Python scipy.stats 模块,binom() 实例源码

我们从Python开源项目中,提取了以下17个代码示例,用于说明如何使用scipy.stats.binom()

项目:pyabc    作者:neuralyzer    | 项目源码 | 文件源码
def test_beta_binomial_two_identical_models(db_path, sampler):
    binomial_n = 5

    def model_fun(args):
        return {"result": st.binom(binomial_n, args.theta).rvs()}

    models = [model_fun for _ in range(2)]
    models = list(map(SimpleModel, models))
    population_size = ConstantPopulationSize(800)
    parameter_given_model_prior_distribution = [Distribution(theta=st.beta(
                                                                      1, 1))
                                                for _ in range(2)]
    abc = ABCSMC(models, parameter_given_model_prior_distribution,
                 MinMaxDistanceFunction(measures_to_use=["result"]),
                 population_size,
                 eps=MedianEpsilon(.1),
                 sampler=sampler)
    abc.new(db_path, {"result": 2})

    minimum_epsilon = .2
    history = abc.run(minimum_epsilon, max_nr_populations=3)
    mp = history.get_model_probabilities(history.max_t)
    assert abs(mp.p[0] - .5) + abs(mp.p[1] - .5) < .08
项目:pyabc    作者:neuralyzer    | 项目源码 | 文件源码
def test_beta_binomial_two_identical_models_adaptive(db_path, sampler):
    binomial_n = 5

    def model_fun(args):
        return {"result": st.binom(binomial_n, args.theta).rvs()}

    models = [model_fun for _ in range(2)]
    models = list(map(SimpleModel, models))
    population_size = AdaptivePopulationSize(800)
    parameter_given_model_prior_distribution = [
        Distribution(theta=st.beta(1, 1)) for _ in range(2)]
    abc = ABCSMC(models, parameter_given_model_prior_distribution,
                 MinMaxDistanceFunction(measures_to_use=["result"]),
                 population_size,
                 eps=MedianEpsilon(.1),
                 sampler=sampler)
    abc.new(db_path, {"result": 2})

    minimum_epsilon = .2
    history = abc.run(minimum_epsilon, max_nr_populations=3)
    mp = history.get_model_probabilities(history.max_t)
    assert abs(mp.p[0] - .5) + abs(mp.p[1] - .5) < .08
项目:poibin    作者:tsakim    | 项目源码 | 文件源码
def test_pmf_pb_binom():
    """Compare the probability mass function with the binomial limit case."""
    # For equal probabilites p_j, the Poisson Binomial distribution reduces to
    # the Binomial one:
    p = [0.5, 0.5]
    pb = PoiBin(p)
    bn = binom(n=2, p=p[0])

    # Compare to four digits behind the comma
    assert int(bn.pmf(0) * 10000) == int(pb.pmf(0) * 10000)

    # For different probabilities p_j, the Poisson Binomial distribution and
    # the Binomial distribution are different:
    pb = PoiBin([0.5, 0.8])
    bn = binom(2, p=0.5)
    assert int(bn.pmf(0) * 10000) != int(pb.pmf(0) * 10000)
项目:poibin    作者:tsakim    | 项目源码 | 文件源码
def test_cdf_pb_binom():
    """Compare the cumulative distribution function with the binomial limit
    case.
    """
    # For equal probabilites p_j, the Poisson Binomial distribution reduces
    # to the Binomial one:
    p = [0.5, 0.5]
    pb = PoiBin(p)
    bn = binom(n=2, p=p[0])
    # Compare to four digits behind the comma
    assert int(bn.cdf(0) * 10000) == int(pb.cdf(0) * 10000)

    # For different probabilities p_j, the Poisson Binomial distribution and
    # the Binomial distribution are different:
    pb = PoiBin([0.5, 0.8])
    bn = binom(2, p=0.5)
    assert int(bn.cdf(0) * 10000) != int(pb.cdf(0) * 10000)
项目:poibin    作者:tsakim    | 项目源码 | 文件源码
def test_pval_pb_binom():
    """Compare the p-values with the binomial limit case.

    Test that the p-values of the Poisson Binomial distribution are the same
    as the ones of the Binomial distribution when all the probabilities are
    equal.
    """
    pi = np.around(np.random.random_sample(), decimals=2)
    ni = np.random.randint(5, 500)
    pp = [pi for i in range(ni)]
    bn = binom(n=ni, p=pi)
    k = np.random.randint(0, ni)
    pval_bn = 1 - bn.cdf(k) + bn.pmf(k)
    pb = PoiBin(pp)
    pval_pb = pb.pval(k)
    assert np.all(np.around(pval_bn, decimals=10) == np.around(pval_pb,
                                                               decimals=10))

# PoiBin.get_cdf ---------------------------------------------------------------
项目:subreddit-scoring    作者:ririw    | 项目源码 | 文件源码
def score_sub(self, sub_id, given_sums, means, smoothing=0):
        dist = stats.binom(given_sums.sum(), means[sub_id])
        mu = dist.mean()
        sd = dist.std()
        dist = (given_sums[sub_id] - smoothing - mu) / sd

        return dist
项目:subreddit-scoring    作者:ririw    | 项目源码 | 文件源码
def score_sub(sub_id, given_sums, means, smoothing=0):
        dist = stats.binom(given_sums.sum(), means[sub_id])
        mu = dist.mean()
        sd = dist.std()
        dist = (given_sums[sub_id] - mu - smoothing) / sd
        return dist
项目:ngsphy    作者:merlyescalona    | 项目源码 | 文件源码
def value(self,samples=1):
        """
        Samples number of values given from the specific distribution.
        ------------------------------------------------------------------------
        - samples: number of values that will be returned.
        """
        value=0
        try:
            for item in self.__params:
                if item==0: break
            if item==0: value=[0]*samples
            else:
                if self.__name=="b": value=self.binom(samples)
                if self.__name=="e": value=self.exponential(samples)
                if self.__name=="f": value=self.fixed(samples)
                if self.__name=="g": value=self.gamma(samples)
                if self.__name=="g1": value=self.gamma1(samples)
                if self.__name=="ln": value=self.lognormal(samples)
                if self.__name=="n": value=self.normal(samples)
                if self.__name=="nb": value=self.nbinom(samples)
                if self.__name=="p": value=self.poisson(samples)
                if self.__name=="u": value=self.uniform(samples)
        except Exception as ex:
            exc_type, exc_obj, exc_tb = sys.exc_info()
            fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
            message="\n\tUnexpected: {0} | {1} - File: {2} - Line:{3}".format(\
                ex,exc_type, fname, exc_tb.tb_lineno)
            status=False
            raise Exception(message)
            # self.appLogger.error(message)
            # sys.exit()

        return value
项目:ngsphy    作者:merlyescalona    | 项目源码 | 文件源码
def binom(self,samples):
        """
        Sampling from a binomial distribution
        ------------------------------------------------------------------------
        - samples: number of values that will be returned.
        """
        #n=number of times tested
        #p=probability
        n=self.__params[0]
        p=self.__params[1]
        distro=binom(n,p)
        f=distro.rvs(size=samples)
        return f
项目:pyabc    作者:neuralyzer    | 项目源码 | 文件源码
def test_beta_binomial_different_priors(db_path, sampler):
    binomial_n = 5

    def model(args):
        return {"result": st.binom(binomial_n, args['theta']).rvs()}

    models = [model for _ in range(2)]
    models = list(map(SimpleModel, models))
    population_size = ConstantPopulationSize(800)
    a1, b1 = 1, 1
    a2, b2 = 10, 1
    parameter_given_model_prior_distribution = [Distribution(theta=RV("beta",
                                                                      a1, b1)),
                                                Distribution(theta=RV("beta",
                                                                      a2, b2))]
    abc = ABCSMC(models, parameter_given_model_prior_distribution,
                 MinMaxDistanceFunction(measures_to_use=["result"]),
                 population_size,
                 eps=MedianEpsilon(.1),
                 sampler=sampler)
    n1 = 2
    abc.new(db_path, {"result": n1})

    minimum_epsilon = .2
    history = abc.run(minimum_epsilon, max_nr_populations=3)
    mp = history.get_model_probabilities(history.max_t)

    def B(a, b):
        return gamma(a) * gamma(b) / gamma(a + b)

    def expected_p(a, b, n1):
        return binom(binomial_n, n1) * B(a + n1, b + binomial_n - n1) / B(a, b)

    p1_expected_unnormalized = expected_p(a1, b1, n1)
    p2_expected_unnormalized = expected_p(a2, b2, n1)
    p1_expected = p1_expected_unnormalized / (p1_expected_unnormalized +
                                              p2_expected_unnormalized)
    p2_expected = p2_expected_unnormalized / (p1_expected_unnormalized +
                                              p2_expected_unnormalized)
    assert abs(mp.p[0] - p1_expected) + abs(mp.p[1] - p2_expected) < .08
项目:poibin    作者:tsakim    | 项目源码 | 文件源码
def test_pmf_accuracy():
    """Compare accuracy of the probability mass function.

    Compare the results with the accuracy check proposed in [Hong2013]_,
    equation (15).
    """
    [p1, p2, p3] = np.around(np.random.random_sample(size=3), decimals=2)
    [n1, n2, n3] = np.random.random_integers(1, 10, size=3)
    nn = n1 + n2 + n3
    l1 = [p1 for i in range(n1)]
    l2 = [p2 for i in range(n2)]
    l3 = [p3 for i in range(n3)]
    p = l1 + l2 + l3
    b1 = binom(n=n1, p=p1)
    b2 = binom(n=n2, p=p2)
    b3 = binom(n=n3, p=p3)
    k = np.random.randint(0, nn + 1)
    chi_bn = 0
    for j in range(0, k+1):
        for i in range(0, j+1):
            chi_bn += b1.pmf(i) * b2.pmf(j - i) * b3.pmf(k - j)
    pb = PoiBin(p)
    chi_pb = pb.pmf(k)
    assert np.all(np.around(chi_bn, decimals=10) == np.around(chi_pb,
                                                              decimals=10))

# PoiBin.cdf ------------------------------------------------------------------
项目:crayimage    作者:yandexdataschool    | 项目源码 | 文件源码
def test_posterior(self):
    samples = 20
    frames = 250
    bins = 10

    X = np.ndarray(shape=(samples, bins), dtype='uint16')

    for i, a in enumerate(np.linspace(0.0, 1.0, num=samples)):
      dist = stats.binom(bins - 1, a)
      X[i] = np.bincount(dist.rvs(size = frames), minlength=bins)

    grid_size = 200
    parameter_grid = np.linspace(0.0, 0.25, num=grid_size)
    parameter_grid_deltas = parameter_grid[1:] - parameter_grid[:-1]
    parameter_distribution = np.ones(grid_size) / grid_size

    pmf = np.ndarray(shape=(grid_size, bins))

    for i, p in enumerate(parameter_grid):
      pmf[i] = stats.binom(bins-1, p).pmf(np.arange(bins))
      self.assertAlmostEqual(np.sum(pmf[i]), 1.0, delta=1.0e-6)

    p = posterior(X, parameter_grid_deltas, parameter_distribution, pmf)

    print p

    n = samples / 2
    tol = 1.0e-200

    self.assertTrue(
      np.allclose(p[-n:], 0.0, atol=tol)
    )
项目:crayimage    作者:yandexdataschool    | 项目源码 | 文件源码
def test_separable(self):
    bins = 10
    frames = 100

    comp1 = compound_distribution(
      parameter_distribution=stats.uniform(0.0, 0.25),
      signal_family=lambda p: stats.binom(bins - 1, p),
      binarize_signal=False, bins = bins
    )

    comp2 = compound_distribution(
      parameter_distribution=stats.uniform(0.5, 1.0),
      signal_family=lambda p: stats.binom(bins - 1, p),
      binarize_signal=False, bins=bins
    )

    grid1 = np.linspace(0.0, 0.25, num=200)
    grid2 = np.linspace(0.5, 1.0, num=200)

    prior1, prior2 = 0.5, 0.5

    gen = CompoundMC(
      category_priors=[prior1, prior2],
      compounds=[comp1, comp2],
      n_pixels=100, n_frames=frames
    )

    cats, params, X = gen.rvs(size=1)

    clf = FastBayesianClassifier(
      priors=[prior1, prior2],
      compounds=[comp1, comp2],
      parameter_grids=[grid1, grid2]
    )

    y = clf.predict_proba(X)

    print np.sum(np.argmax(cats, axis=1) != y)
项目:mixedvines    作者:asnelt    | 项目源码 | 文件源码
def fit(samples, is_continuous):
        '''
        Fits a distribution to the given samples.

        Parameters
        ----------
        samples : array_like
            Array of samples.
        is_continuous : bool
            If `True` then a continuous distribution is fitted.  Otherwise, a
            discrete distribution is fitted.

        Returns
        -------
        best_marginal : Marginal
            The distribution fitted to `samples`.
        '''
        # Mean and variance
        mean = np.mean(samples)
        var = np.var(samples)
        # Set suitable distributions
        if is_continuous:
            if np.any(samples <= 0):
                options = [norm]
            else:
                options = [norm, gamma]
        else:
            if var > mean:
                options = [poisson, binom, nbinom]
            else:
                options = [poisson, binom]
        params = np.empty(len(options), dtype=object)
        marginals = np.empty(len(options), dtype=object)
        # Fit parameters and construct marginals
        for i, dist in enumerate(options):
            if dist == poisson:
                params[i] = [mean]
            elif dist == binom:
                param_n = np.max(samples)
                param_p = np.sum(samples) / (param_n * len(samples))
                params[i] = [param_n, param_p]
            elif dist == nbinom:
                param_n = mean * mean / (var - mean)
                param_p = mean / var
                params[i] = [param_n, param_p]
            else:
                params[i] = dist.fit(samples)
            rv_mixed = dist(*params[i])
            marginals[i] = Marginal(rv_mixed)
        # Calculate Akaike information criterion
        aic = np.zeros(len(options))
        for i, marginal in enumerate(marginals):
            aic[i] = 2 * len(params[i]) \
                     - 2 * np.sum(marginal.logpdf(samples))
        best_marginal = marginals[np.argmin(aic)]
        return best_marginal
项目:pyabc    作者:neuralyzer    | 项目源码 | 文件源码
def test_beta_binomial_different_priors_initial_epsilon_from_sample(db_path,
                                                                    sampler):
    binomial_n = 5

    def model(args):
        return {"result": st.binom(binomial_n, args.theta).rvs()}

    models = [model for _ in range(2)]
    models = list(map(SimpleModel, models))
    population_size = ConstantPopulationSize(800)
    a1, b1 = 1, 1
    a2, b2 = 10, 1
    parameter_given_model_prior_distribution = [Distribution(theta=RV("beta",
                                                                      a1, b1)),
                                                Distribution(theta=RV("beta",
                                                                      a2, b2))]
    abc = ABCSMC(models, parameter_given_model_prior_distribution,
                 MinMaxDistanceFunction(measures_to_use=["result"]),
                 population_size,
                 eps=MedianEpsilon(median_multiplier=.9),
                 sampler=sampler)
    n1 = 2
    abc.new(db_path, {"result": n1})

    minimum_epsilon = -1
    history = abc.run(minimum_epsilon, max_nr_populations=5)
    mp = history.get_model_probabilities(history.max_t)

    def B(a, b):
        return gamma(a) * gamma(b) / gamma(a + b)

    def expected_p(a, b, n1):
        return binom(binomial_n, n1) * B(a + n1, b + binomial_n - n1) / B(a, b)

    p1_expected_unnormalized = expected_p(a1, b1, n1)
    p2_expected_unnormalized = expected_p(a2, b2, n1)
    p1_expected = p1_expected_unnormalized / (p1_expected_unnormalized +
                                              p2_expected_unnormalized)
    p2_expected = p2_expected_unnormalized / (p1_expected_unnormalized +
                                              p2_expected_unnormalized)

    assert abs(mp.p[0] - p1_expected) + abs(mp.p[1] - p2_expected) < .08
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_stratified_shuffle_split_even():
    # Test the StratifiedShuffleSplit, indices are drawn with a
    # equal chance
    n_folds = 5
    n_iter = 1000

    def assert_counts_are_ok(idx_counts, p):
        # Here we test that the distribution of the counts
        # per index is close enough to a binomial
        threshold = 0.05 / n_splits
        bf = stats.binom(n_splits, p)
        for count in idx_counts:
            p = bf.pmf(count)
            assert_true(p > threshold,
                        "An index is not drawn with chance corresponding "
                        "to even draws")

    for n_samples in (6, 22):
        labels = np.array((n_samples // 2) * [0, 1])
        splits = StratifiedShuffleSplit(n_iter=n_iter,
                                        test_size=1. / n_folds,
                                        random_state=0)

        train_counts = [0] * n_samples
        test_counts = [0] * n_samples
        n_splits = 0
        for train, test in splits.split(X=np.ones(n_samples), y=labels):
            n_splits += 1
            for counter, ids in [(train_counts, train), (test_counts, test)]:
                for id in ids:
                    counter[id] += 1
        assert_equal(n_splits, n_iter)

        n_train, n_test = _validate_shuffle_split(n_samples,
                                                  test_size=1./n_folds,
                                                  train_size=1.-(1./n_folds))

        assert_equal(len(train), n_train)
        assert_equal(len(test), n_test)
        assert_equal(len(set(train).intersection(test)), 0)

        label_counts = np.unique(labels)
        assert_equal(splits.test_size, 1.0 / n_folds)
        assert_equal(n_train + n_test, len(labels))
        assert_equal(len(label_counts), 2)
        ex_test_p = float(n_test) / n_samples
        ex_train_p = float(n_train) / n_samples

        assert_counts_are_ok(train_counts, ex_train_p)
        assert_counts_are_ok(test_counts, ex_test_p)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_stratified_shuffle_split_even():
    # Test the StratifiedShuffleSplit, indices are drawn with a
    # equal chance
    n_folds = 5
    n_iter = 1000

    def assert_counts_are_ok(idx_counts, p):
        # Here we test that the distribution of the counts
        # per index is close enough to a binomial
        threshold = 0.05 / n_splits
        bf = stats.binom(n_splits, p)
        for count in idx_counts:
            p = bf.pmf(count)
            assert_true(p > threshold,
                        "An index is not drawn with chance corresponding "
                        "to even draws")

    for n_samples in (6, 22):
        labels = np.array((n_samples // 2) * [0, 1])
        splits = cval.StratifiedShuffleSplit(labels, n_iter=n_iter,
                                             test_size=1. / n_folds,
                                             random_state=0)

        train_counts = [0] * n_samples
        test_counts = [0] * n_samples
        n_splits = 0
        for train, test in splits:
            n_splits += 1
            for counter, ids in [(train_counts, train), (test_counts, test)]:
                for id in ids:
                    counter[id] += 1
        assert_equal(n_splits, n_iter)

        assert_equal(len(train), splits.n_train)
        assert_equal(len(test), splits.n_test)
        assert_equal(len(set(train).intersection(test)), 0)

        label_counts = np.unique(labels)
        assert_equal(splits.test_size, 1.0 / n_folds)
        assert_equal(splits.n_train + splits.n_test, len(labels))
        assert_equal(len(label_counts), 2)
        ex_test_p = float(splits.n_test) / n_samples
        ex_train_p = float(splits.n_train) / n_samples

        assert_counts_are_ok(train_counts, ex_train_p)
        assert_counts_are_ok(test_counts, ex_test_p)