Python scipy.stats 模块,gaussian_kde() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用scipy.stats.gaussian_kde()

项目:plotnine    作者:has2k1    | 项目源码 | 文件源码
def kde_scipy(data, grid, **kwargs):
    """
    Kernel Density Estimation with Scipy

    Parameters
    ----------
    data : numpy.array
        Data points used to compute a density estimator. It
        has `n x p` dimensions, representing n points and p
        variables.
    grid : numpy.array
        Data points at which the desity will be estimated. It
        has `m x p` dimensions, representing m points and p
        variables.

    Returns
    -------
    out : numpy.array
        Density estimate. Has `m x 1` dimensions
    """
    kde = gaussian_kde(data.T, **kwargs)
    return kde.evaluate(grid.T)
项目:abcpy    作者:eth-cscs    | 项目源码 | 文件源码
def plot(samples, path = None, true_value = 5, title = 'ABC posterior'): 
    Bayes_estimate = np.mean(samples, axis = 0)
    theta = true_value
    xmin, xmax = max(samples[:,0]), min(samples[:,0])
    positions = np.linspace(xmin, xmax, samples.shape[0])
    gaussian_kernel = gaussian_kde(samples[:,0].reshape(samples.shape[0],))
    values = gaussian_kernel(positions)
    plt.figure()
    plt.plot(positions,gaussian_kernel(positions))
    plt.plot([theta, theta],[min(values), max(values)+.1*(max(values)-min(values))])
    plt.plot([Bayes_estimate, Bayes_estimate],[min(values), max(values)+.1*(max(values)-min(values))])
    plt.ylim([min(values), max(values)+.1*(max(values)-min(values))])
    plt.xlabel(r'$\theta$')
    plt.ylabel('density')
    #plt.xlim([0,1])
    plt.rc('axes', labelsize=15) 
    plt.legend(loc='best', frameon=False, numpoints=1)
    font = {'size'   : 15}
    plt.rc('font', **font)
    plt.title(title)
    if path is not None :
        plt.savefig(path)
    return plt
项目:lnn    作者:wgao9    | 项目源码 | 文件源码
def KDE_entropy(x):
    '''
        Estimate the entropy H(X) from samples {x_i}_{i=1}^N
        Using Kernel Density Estimator (KDE) and resubstitution

        Input: x: 2D list of size N*d_x

        Output: one number of H(X)
    '''

    N = len(x)
    d = len(x[0])
    local_est = np.zeros(N)
    for i in range(N):
        kernel = sst.gaussian_kde(x.transpose())
        local_est[i] = kernel.evaluate(x[i].transpose())
    return -np.mean(map(log,local_est))
项目:5th_place_solution_facebook_check_ins    作者:aikinogard    | 项目源码 | 文件源码
def kde_opt2c(df_cell_train_feats, y_train, df_cell_test_feats):
    def prepare_feats(df):
        df_new = pd.DataFrame()
        df_new["hour"] = df["hour"]
        df_new["weekday"] = df["weekday"] + df["hour"] / 24.
        df_new["accuracy"] = df["accuracy"].apply(lambda x: np.log10(x))
        df_new["x"] = df["x"]
        df_new["y"] = df["y"]
        return df_new
    logging.info("train kde_opt2c model")
    df_cell_train_feats_kde = prepare_feats(df_cell_train_feats)
    df_cell_test_feats_kde = prepare_feats(df_cell_test_feats)
    n_class = len(np.unique(y_train))
    y_test_pred = np.zeros((len(df_cell_test_feats_kde), n_class), "d")
    for i in range(n_class):
        X = df_cell_train_feats_kde[y_train == i]
        y_test_pred_i = np.ones(len(df_cell_test_feats_kde), "d")
        for feat in df_cell_train_feats_kde.columns.values:
            X_feat = X[feat].values
            kde = gaussian_kde(X_feat, "scott")
            y_test_pred_i *= kde.evaluate(df_cell_test_feats_kde[feat].values)
        y_test_pred[:, i] += y_test_pred_i
    return y_test_pred
项目:5th_place_solution_facebook_check_ins    作者:aikinogard    | 项目源码 | 文件源码
def kde_opt3c(df_cell_train_feats, y_train, df_cell_test_feats):
    def prepare_feats(df):
        df_new = pd.DataFrame()
        df_new["hour"] = df["hour"]
        df_new["weekday"] = df["weekday"] + df["hour"] / 24.
        df_new["accuracy"] = df["accuracy"].apply(lambda x: np.log10(x))
        df_new["x"] = df["x"]
        df_new["y"] = df["y"]
        return df_new
    logging.info("train kde_opt3c model")
    df_cell_train_feats_kde = prepare_feats(df_cell_train_feats)
    df_cell_test_feats_kde = prepare_feats(df_cell_test_feats)
    n_class = len(np.unique(y_train))
    y_test_pred = np.zeros((len(df_cell_test_feats_kde), n_class), "d")
    for i in range(n_class):
        X = df_cell_train_feats_kde[y_train == i]
        y_test_pred_i = np.ones(len(df_cell_test_feats_kde), "d")
        for feat in df_cell_train_feats_kde.columns.values:
            X_feat = X[feat].values
            kde = gaussian_kde(X_feat, "scott")
            kde = gaussian_kde(X_feat, kde.factor * 0.741379)
            y_test_pred_i *= kde.evaluate(df_cell_test_feats_kde[feat].values)
        y_test_pred[:, i] += y_test_pred_i
    return y_test_pred
项目:lens    作者:ASIDataScience    | 项目源码 | 文件源码
def _scipy_bivariate_kde(x, y, bw, gridsize, cut, clip):
    """Compute a bivariate kde using scipy."""
    data = np.c_[x, y]
    kde = stats.gaussian_kde(data.T)
    data_std = data.std(axis=0, ddof=1)
    if isinstance(bw, string_types):
        bw = "scotts" if bw == "scott" else bw
        bw_x = getattr(kde, "%s_factor" % bw)() * data_std[0]
        bw_y = getattr(kde, "%s_factor" % bw)() * data_std[1]
    elif np.isscalar(bw):
        bw_x, bw_y = bw, bw
    else:
        msg = ("Cannot specify a different bandwidth for each dimension "
               "with the scipy backend. You should install statsmodels.")
        raise ValueError(msg)
    x_support = _kde_support(data[:, 0], bw_x, gridsize, cut, clip[0])
    y_support = _kde_support(data[:, 1], bw_y, gridsize, cut, clip[1])
    xx, yy = np.meshgrid(x_support, y_support)
    z = kde([xx.ravel(), yy.ravel()]).reshape(xx.shape)
    return xx, yy, z
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def _plot(cls, ax, y, style=None, bw_method=None, ind=None,
              column_num=None, stacking_id=None, **kwds):
        from scipy.stats import gaussian_kde
        from scipy import __version__ as spv

        y = remove_na(y)

        if LooseVersion(spv) >= '0.11.0':
            gkde = gaussian_kde(y, bw_method=bw_method)
        else:
            gkde = gaussian_kde(y)
            if bw_method is not None:
                msg = ('bw_method was added in Scipy 0.11.0.' +
                       ' Scipy version in use is %s.' % spv)
                warnings.warn(msg)

        y = gkde.evaluate(ind)
        lines = MPLPlot._plot(ax, ind, y, style=style, **kwds)
        return lines
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def work(self, fig=None, ax=None):
        """Draw a one dimensional kernel density plot.
        You can specify either a figure or an axis to draw on.

        Parameters:
        -----------
        fig: matplotlib figure object
        ax: matplotlib axis object to draw on

        Returns:
        --------
        fig, ax: matplotlib figure and axis objects
        """
        if ax is None:
            if fig is None:
                return fig, ax
            else:
                ax = fig.gca()
        from scipy.stats import gaussian_kde
        x = self.data[self.aes['x']]
        gkde = gaussian_kde(x)
        ind = np.linspace(x.min(), x.max(), 200)
        ax.plot(ind, gkde.evaluate(ind))
        return fig, ax
项目:wub    作者:nanoporetech    | 项目源码 | 文件源码
def estimate_mode(acc):
    """ Estimate the mode of a set of float values between 0 and 1.

    :param acc: Data.
    :returns: The mode of the sample
    :rtype: float
    """
    # Taken from sloika.
    if len(acc) > 1:
        da = gaussian_kde(acc)
        optimization_result = minimize_scalar(lambda x: -da(x), bounds=(0, 1), method='Bounded')
        if optimization_result.success:
            try:
                mode = optimization_result.x[0]
            except IndexError:
                mode = optimization_result.x
        else:
            sys.stderr.write("Mode computation failed")
            mode = 0
    else:
        mode = acc[0]
    return mode
项目:ndmg    作者:neurodata    | 项目源码 | 文件源码
def density(data, nbins=500, rng=None):
    """
    Computes density for metrics which return vectors

    Required parameters:
        data:
            - Dictionary of the vectors of data
    """
    density = OrderedDict()
    xs = OrderedDict()
    for subj in data:
        hist = np.histogram(data[subj], nbins)
        hist = np.max(hist[0])
        dens = gaussian_kde(data[subj])
        if rng is not None:
            xs[subj] = np.linspace(rng[0], rng[1], nbins)
        else:
            xs[subj] = np.linspace(0, np.max(data[subj]), nbins)
        density[subj] = dens.evaluate(xs[subj])*np.max(data[subj]*hist)
    return {"xs": xs, "pdfs": density}
项目:openml-pimp    作者:janvanrijn    | 项目源码 | 文件源码
def __init__(self, hyperparameter, param_name, data, oob_strategy='resample', bandwith=0.4):
        if oob_strategy not in ['resample', 'round', 'ignore']:
            raise ValueError()
        self.oob_strategy = oob_strategy
        self.param_name = param_name
        self.hyperparameter = hyperparameter
        reshaped = np.reshape(data, (len(data), 1))

        if self.hyperparameter.log:
            if isinstance(self.hyperparameter, UniformIntegerHyperparameter):
                # self.probabilities = {val: self.distrib.pdf(np.log2(val)) for val in range(self.hyperparameter.lower, self.hyperparameter.upper)}
                raise ValueError('Log Integer hyperparameter not supported: %s' %param_name)
            # self.distrib = gaussian_kde(np.log2(data))
            # self.distrib = KernelDensity(kernel='gaussian').fit(np.log2(np.reshape(data, (len(data), 1))))
            self.distrib = KernelDensity(kernel='gaussian', bandwidth=bandwith).fit(np.log2(reshaped))
        else:
            # self.distrib = gaussian_kde(data)
            self.distrib = KernelDensity(kernel='gaussian', bandwidth=bandwith).fit(reshaped)
        pass
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_gaussian_kde(n_samples=1000):
    # Compare gaussian KDE results to scipy.stats.gaussian_kde
    from scipy.stats import gaussian_kde
    np.random.seed(0)
    x_in = np.random.normal(0, 1, n_samples)
    x_out = np.linspace(-5, 5, 30)

    for h in [0.01, 0.1, 1]:
        bt = BallTree(x_in[:, None])
        try:
            gkde = gaussian_kde(x_in, bw_method=h / np.std(x_in))
        except TypeError:
            raise SkipTest("Old version of scipy, doesn't accept "
                           "explicit bandwidth.")

        dens_bt = bt.kernel_density(x_out[:, None], h) / n_samples
        dens_gkde = gkde.evaluate(x_out)

        assert_array_almost_equal(dens_bt, dens_gkde, decimal=3)
项目:Parallel-SGD    作者:angadgill    | 项目源码 | 文件源码
def test_gaussian_kde(n_samples=1000):
    # Compare gaussian KDE results to scipy.stats.gaussian_kde
    from scipy.stats import gaussian_kde
    np.random.seed(0)
    x_in = np.random.normal(0, 1, n_samples)
    x_out = np.linspace(-5, 5, 30)

    for h in [0.01, 0.1, 1]:
        kdt = KDTree(x_in[:, None])
        try:
            gkde = gaussian_kde(x_in, bw_method=h / np.std(x_in))
        except TypeError:
            raise SkipTest("Old scipy, does not accept explicit bandwidth.")

        dens_kdt = kdt.kernel_density(x_out[:, None], h) / n_samples
        dens_gkde = gkde.evaluate(x_out)

        assert_array_almost_equal(dens_kdt, dens_gkde, decimal=3)
项目:bnn-analysis    作者:myshkov    | 项目源码 | 文件源码
def kl_divergence(p_samples, q_samples):
    # estimate densities
    # p_samples = np.nan_to_num(p_samples)
    # q_samples = np.nan_to_num(q_samples)

    if isinstance(p_samples, tuple):
        idx, p_samples = p_samples

        if idx not in _cached_p_pdf:
            _cached_p_pdf[idx] = sc.gaussian_kde(p_samples)

        p_pdf = _cached_p_pdf[idx]
    else:
        p_pdf = sc.gaussian_kde(p_samples)

    q_pdf = sc.gaussian_kde(q_samples)

    # joint support
    left = min(min(p_samples), min(q_samples))
    right = max(max(p_samples), max(q_samples))

    p_samples_num = p_samples.shape[0]
    q_samples_num = q_samples.shape[0]

    # quantise
    lin = np.linspace(left, right, min(max(p_samples_num, q_samples_num), MAX_GRID_POINTS))
    p = p_pdf.pdf(lin)
    q = q_pdf.pdf(lin)

    # KL
    kl = min(sc.entropy(p, q), MAX_KL)

    return kl
项目:bnn-analysis    作者:myshkov    | 项目源码 | 文件源码
def ci_metrics(p_samples, q_samples, alpha=.95):
    if isinstance(p_samples, tuple):
        idx, p_samples = p_samples

    # compute CIs
    p_left, p_right = _compute_ci(p_samples, alpha)
    q_left, q_right = _compute_ci(q_samples, alpha)

    precision = 0
    iou = 0
    recall = 0
    f1 = 0

    if (p_right > q_left) and (q_right > p_left):
        # intersection
        int_left = max(q_left, p_left)
        int_right = min(p_right, q_right)

        union_left = min(p_left, q_left)
        union_right = max(p_right, q_right)

        iou = (int_right - int_left) / (union_right - union_left)

        precision = (int_right - int_left) / (q_right - q_left)
        recall = (int_right - int_left) / (p_right - p_left)

        f1 = 2 * precision * recall / (precision + recall)

        # estimate densities
        # p_pdf = sc.gaussian_kde(p_samples)
        # q_pdf = sc.gaussian_kde(q_samples)
        #
        # precision = q_pdf.integrate_box(int_left, int_right) / alpha
        # recall = p_pdf.integrate_box(int_left, int_right) / alpha

    return iou, precision, recall, f1
项目:answer-triggering    作者:jiez-osu    | 项目源码 | 文件源码
def visualize_distribution(data):
    """ Visualize bucket distribution, a distribution over:
    (sentence_number, question_length, sentence_length)"""
    bucket_distribution = []
    for qid, value in data.iteritems():
      q_length = len(value['question'])
      s_number = len(value['sentences'])
      s_length = max([len(sent) for sent in value['sentences'].itervalues()])
      bucket_distribution.append([s_number, q_length, s_length])

    # pdb.set_trace()
    distribution = np.transpose(np.array(bucket_distribution))
    kde = stats.gaussian_kde(distribution)
    density = kde(distribution)

    idx = density.argsort()
    x = distribution[0, idx]
    y = distribution[1, idx]
    z = distribution[2, idx]
    density = density[idx]

    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    ax.scatter(x, y, z, c=density)
    ax.set_xlabel('sentence number')
    ax.set_ylabel('question length')
    ax.set_zlabel('sentence length')
    plt.show()
项目:uncover-ml    作者:GeoscienceAustralia    | 项目源码 | 文件源码
def fit(self, y):

        self.lb = y.min() - 1e6
        self.ub = y.max() + 1e6
        self.kde = gaussian_kde(y)
项目:expan    作者:zalando    | 项目源码 | 文件源码
def bayes_factor(x, y, distribution='normal', num_iters=25000, inference='sampling'):
    """
    Args:
        x (array_like): sample of a treatment group
        y (array_like): sample of a control group
        distribution: name of the KPI distribution model, which assumes a
            Stan model file with the same name exists
        num_iters: number of iterations of bayes sampling
        inference: sampling or variational inference method for approximation the posterior

    Returns:
        dictionary with statistics
    """
    traces, n_x, n_y, mu_x, mu_y = _bayes_sampling(x, y, distribution=distribution, num_iters=num_iters,
                                                   inference=inference)
    trace_normalized_effect_size = get_trace_normalized_effect_size(distribution, traces)
    trace_absolute_effect_size = traces['delta']

    kde = gaussian_kde(trace_normalized_effect_size)
    prior = cauchy.pdf(0, loc=0, scale=1)
    # BF_01
    bf = kde.evaluate(0)[0] / prior
    stop = bf > 3 or bf < 1 / 3.

    credibleMass = 0.95                # another magic number
    leftOut      = 1.0 - credibleMass
    p1           = round(leftOut/2.0, 5)
    p2           = round(1.0 - leftOut/2.0, 5)
    credible_interval = HDI_from_MCMC(trace_absolute_effect_size, credibleMass)

    return {'stop'                  : bool(stop),
            'delta'                 : float(mu_x - mu_y),
            'confidence_interval'   : [{'percentile': p*100, 'value': v} for p, v in zip([p1, p2], credible_interval)],
            'treatment_sample_size' : int(n_x),
            'control_sample_size'   : int(n_y),
            'treatment_mean'        : float(mu_x),
            'control_mean'          : float(mu_y),
            'number_of_iterations'  : num_iters}
项目:f1-communities    作者:GiulioRossetti    | 项目源码 | 文件源码
def __plot_scatter(prl, max_points=None, fileout="PR_scatter.png", title="Precision Recall Scatter Plot"):
        """

        :param prl: list of tuples (precision, recall)
        :param max_points: max number of tuples to plot
        :param fileout: output filename
        :param title: plot title
        """

        prs = [i[0] for i in prl]
        recs = [i[1] for i in prl]

        if max_points is not None:
            prs = prs[:max_points]
            recs = recs[:max_points]

        xy = np.vstack([prs, recs])
        z = gaussian_kde(xy)(xy)
        x = np.array(prs)
        y = np.array(recs)
        base = min(z)
        rg = max(z) - base
        z = np.array(z)
        idx = z.argsort()
        x, y, z = x[idx], y[idx], (z[idx] - base) / rg
        fig, ax = plt.subplots()
        sca = ax.scatter(x, y, c=z, s=50, edgecolor='', cmap=plt.cm.jet)
        fig.colorbar(sca)
        plt.ylabel("Recall", fontsize=20, labelpad=15)
        plt.xlabel("Precision", fontsize=20)
        plt.ylim([-0.01, 1.01])
        plt.xlim([-0.01, 1.01])
        plt.title(title)
        if matplotlib.get_backend().lower() in ['agg', 'macosx']:
            fig.set_tight_layout(True)
        else:
            fig.tight_layout()

        plt.savefig("%s" % fileout)
项目:orange3-timeseries    作者:biolab    | 项目源码 | 文件源码
def setHistogram(self, values=None, bins=None, use_kde=False, histogram=None):
        """ Set background histogram (or density estimation, violin plot)

        The histogram of bins is calculated from values, optionally as a
        Gaussian KDE. If histogram is provided, its values are used directly
        and other parameters are ignored.
        """
        if (values is None or not len(values)) and histogram is None:
            self.setPixmap(None)
            return
        if histogram is not None:
            self._histogram = hist = histogram
        else:
            if bins is None:
                bins = min(100, max(10, len(values) // 20))
            if use_kde:
                hist = gaussian_kde(values,
                                    None if isinstance(use_kde, bool) else use_kde)(
                    np.linspace(np.min(values), np.max(values), bins))
            else:
                hist = np.histogram(values, bins)[0]
            self._histogram = hist = hist / hist.max()

        HEIGHT = self.rect().height() / 2
        OFFSET = HEIGHT * .3
        pixmap = QPixmap(QSize(len(hist), 2 * (HEIGHT + OFFSET)))  # +1 avoids right/bottom frame border shadow
        pixmap.fill(Qt.transparent)
        painter = QPainter(pixmap)
        painter.setPen(QPen(Qt.darkGray))
        for x, value in enumerate(hist):
            painter.drawLine(x, HEIGHT * (1 - value) + OFFSET,
                             x, HEIGHT * (1 + value) + OFFSET)

        if self.orientation() != Qt.Horizontal:
            pixmap = pixmap.transformed(QTransform().rotate(-90))

        self.setPixmap(pixmap)
项目:lddmm-ot    作者:jeanfeydy    | 项目源码 | 文件源码
def make_kde(self):
        """
        Makes the kernel density estimation(s) for create_distplot().

        This is called when curve_type = 'kde' in create_distplot().

        :rtype (list) curve: list of kde representations
        """
        curve = [None] * self.trace_number
        for index in range(self.trace_number):
            self.curve_x[index] = [self.start[index] +
                                   x * (self.end[index] - self.start[index])
                                   / 500 for x in range(500)]
            self.curve_y[index] = (scipy.stats.gaussian_kde
                                   (self.hist_data[index])
                                   (self.curve_x[index]))

            if self.histnorm == ALTERNATIVE_HISTNORM:
                self.curve_y[index] *= self.bin_size[index]

        for index in range(self.trace_number):
            curve[index] = dict(type='scatter',
                                x=self.curve_x[index],
                                y=self.curve_y[index],
                                xaxis='x1',
                                yaxis='y1',
                                mode='lines',
                                name=self.group_labels[index],
                                legendgroup=self.group_labels[index],
                                showlegend=False if self.show_hist else True,
                                marker=dict(color=self.colors[index]))
        return curve
项目:CAAPR    作者:Stargrazer82301    | 项目源码 | 文件源码
def getDensityKernel(x,y):

    # filter NaNs and infinite numbers
    idx = (np.isfinite(x) * np.isfinite(y))
    x = x[idx]
    y = y[idx]

    # Calculate the point density
    xy = np.vstack([x,y])
    z = gaussian_kde(xy)(xy)
    idx = z.argsort()
    return x[idx], y[idx], z[idx]

# RADIAL PROFILES
项目:Orbit-Fitting    作者:jacob-i-skinner    | 项目源码 | 文件源码
def maximize(samples):
    '''
    Use Kernel Density Estimation to find a continuous PDF
    from the discrete sampling. Maximize that distribution.

    Parameters
    ----------
    samples : list (shape = (nsamples, ndim))
        Observations drawn from the distribution which is
        going to be fit.

    Returns
    -------
    maximum : list(ndim)
        Maxima of the probability distributions along each axis.
    '''
    from scipy.optimize import minimize
    from scipy.stats import gaussian_kde as kde

    # Give the samples array the proper shape.
    samples = np.transpose(samples)

    # Estimate the continous PDF.
    estimate = kde(samples)

    # Take the minimum of the estimate.
    def PDF(x):
        return -estimate(x)

    # Initial guess on maximum is made from 50th percentile of the sample.
    p0 = [np.percentile(samples[i], 50) for i in range(samples.shape[0])]

    # Calculate the maximum of the distribution.
    maximum = minimize(PDF, p0).x

    return maximum
项目:5th_place_solution_facebook_check_ins    作者:aikinogard    | 项目源码 | 文件源码
def kde_opt4(df_cell_train_feats, y_train, df_cell_test_feats):
    def prepare_feats(df):
        df_new = pd.DataFrame()
        df_new["hour"] = df["hour"]
        df_new["weekday"] = df["weekday"] + df["hour"] / 24.
        df_new["accuracy"] = df["accuracy"].apply(lambda x: np.log10(x))
        df_new["x"] = df["x"]
        df_new["y"] = df["y"]
        return df_new
    logging.info("train kde_opt4 model")
    df_cell_train_feats_kde = prepare_feats(df_cell_train_feats)
    df_cell_test_feats_kde = prepare_feats(df_cell_test_feats)
    n_class = len(np.unique(y_train))
    y_test_pred = np.zeros((len(df_cell_test_feats_kde), n_class), "d")
    for i in range(n_class):
        X = df_cell_train_feats_kde[y_train == i]
        y_test_pred_i = np.ones(len(df_cell_test_feats_kde), "d")
        for feat in df_cell_train_feats_kde.columns.values:
            X_feat = X[feat].values
            BGK10_output = kdeBGK10(X_feat)
            if BGK10_output is None:
                kde = gaussian_kde(X_feat, "scott")
                kde = gaussian_kde(X_feat, kde.factor * 0.741379)
                y_test_pred_i *= kde.evaluate(df_cell_test_feats_kde[feat].values)
            else:
                bandwidth, mesh, density = BGK10_output
                kde = KernelDensity(kernel='gaussian', metric='manhattan', bandwidth=bandwidth)
                kde.fit(X_feat[:, np.newaxis])
                y_test_pred_i *= np.exp(kde.score_samples(df_cell_test_feats_kde[feat].values[:, np.newaxis]))
        y_test_pred[:, i] += y_test_pred_i
    return y_test_pred
项目:pumil    作者:levelfour    | 项目源码 | 文件源码
def weighted_kde(Bn, conf):
  # N.B. apply WKDE function to the instance multiplied by its confindence value
  weighted_ins = np.vstack([conf[i] * B.data() for i, B in enumerate(Bn)])
  return stats.gaussian_kde(weighted_ins.T)
项目:lens    作者:ASIDataScience    | 项目源码 | 文件源码
def _scipy_univariate_kde(data, bw, gridsize, cut, clip):
    """Compute a univariate kernel density estimate using scipy."""
    kde = stats.gaussian_kde(data, bw_method=bw)
    if isinstance(bw, string_types):
        bw = "scotts" if bw == "scott" else bw
        bw = getattr(kde, "%s_factor" % bw)() * np.std(data)
    grid = _kde_support(data, bw, gridsize, cut, clip)
    y = kde(grid)
    return grid, y
项目:CElegansBehaviour    作者:ChristophKirst    | 项目源码 | 文件源码
def plot_distributions(data, cmap = plt.cm.Spectral_r, percentiles =  [5, 25, 50, 75, 95], percentiles_colors = ['gray', 'gray', 'red', 'gray', 'gray']):
  """Plots the data point color as local density"""
  npoints, ntimes = data.shape;

  for t in range(ntimes):
    cols = gaussian_kde(data[:,t])(data[:,t]);
    idx = cols.argsort();

    plt.scatter(np.ones(npoints)*t, data[idx,t], c=cols[idx], s=30, edgecolor='face', cmap = cmap)

  pct = np.percentile(data, percentiles, axis = 0); 
  for i in range(len(percentiles)):
    #plt.plot(iqr[s0][i,:],  c = plt.cm.Spectral(i/5.0), linewidth = 3);
    plt.plot(pct[i,:], c = percentiles_colors[i], linewidth = 2);
项目:CElegansBehaviour    作者:ChristophKirst    | 项目源码 | 文件源码
def plot_embedding_contours(Y, contours = 10, cmap = plt.cm.plasma, xmin = None, xmax = None, ymin = None, ymax = None, npts = 100, density = False):
  """Plot a 2d density map of the embedding Y"""

  if xmin is None:
    xmin = np.min(Y[:,0]);
  if xmax is None:
    xmax = np.max(Y[:,0]);
  if ymin is None:
    ymin = np.min(Y[:,1]);
  if ymax is None:
    ymax = np.max(Y[:,1]);   

  #print xmin,xmax,ymin,ymax
  dx = float(xmax-xmin) / npts;
  dy = float(xmax-xmin) / npts;
  xx, yy = np.mgrid[xmin:xmax:dx, ymin:ymax:dy]
  positions = np.vstack([xx.ravel(), yy.ravel()])
  kernel = st.gaussian_kde(Y.T);
  #print xx.shape
  #print positions.shape
  #print Y.shape
  #print kernel(positions).shape
  f = kernel(positions)
  f = np.reshape(f, xx.shape)
  #print f.shape

  ax = plt.gca()
  ax.set_xlim(xmin, xmax)
  ax.set_ylim(ymin, ymax)
  # Contourf plot
  if density:
    cfset = ax.contourf(xx, yy, f, cmap=cmap)
  ## Or kernel density estimate plot instead of the contourf plot
  #ax.imshow(np.rot90(f), cmap='Blues', extent=[xmin, xmax, ymin, ymax])
  # Contour plot
  if contours is not None:
    cset = ax.contour(xx, yy, f, contours, cmap=cmap)
  # Label plot
  #ax.clabel(cset, inline=1, fontsize=10)
  ax.set_xlabel('Y0')
  ax.set_ylabel('Y1')
项目:CElegansBehaviour    作者:ChristophKirst    | 项目源码 | 文件源码
def plot_distributions(data, cmap = plt.cm.Spectral_r, percentiles =  [5, 25, 50, 75, 95], percentiles_colors = ['gray', 'gray', 'red', 'gray', 'gray']):
  """Plots the data point color as local density"""
  npoints, ntimes = data.shape;

  for t in range(ntimes):
    cols = gaussian_kde(data[:,t])(data[:,t]);
    idx = cols.argsort();

    plt.scatter(np.ones(npoints)*t, data[idx,t], c=cols[idx], s=30, edgecolor='face', cmap = cmap)

  pct = np.percentile(data, percentiles, axis = 0); 
  for i in range(len(percentiles)):
    #plt.plot(iqr[s0][i,:],  c = plt.cm.Spectral(i/5.0), linewidth = 3);
    plt.plot(pct[i,:], c = percentiles_colors[i], linewidth = 2);
项目:cellcomplex    作者:VirtualPlants    | 项目源码 | 文件源码
def violin_plot(figure,X,data,colors,xlabel="",ylabel="",n_points=400,violin_width=None,linewidth=3,marker_size=20):
    font = fm.FontProperties(family = 'Trebuchet', weight ='light')
    #font = fm.FontProperties(family = 'CenturyGothic',fname = '/Library/Fonts/Microsoft/Century Gothic', weight ='light')
    figure.patch.set_facecolor('white')
    axes = figure.add_subplot(111)
    if violin_width is None:
        if len(X)>1:
            violin_width = ((np.array(X)[1:] - np.array(X)[:-1]).mean())/3.
        else:
            violin_width = 0.33
    for x in xrange(len(X)):
        color = colors[x]

        Y = gaussian_kde(data[x])
        D_smooth = np.linspace(np.percentile(Y.dataset,1),np.percentile(Y.dataset,99),n_points)
        Y_smooth = Y.evaluate(D_smooth)
        Y_smooth = violin_width*Y_smooth/Y_smooth.max()
        axes.fill_betweenx(D_smooth,X[x],X[x]+Y_smooth,facecolor=color,alpha=0.1)
        axes.fill_betweenx(D_smooth,X[x],X[x]-Y_smooth,facecolor=color,alpha=0.1)
        axes.plot(X[x]+Y_smooth,D_smooth,color=color,linewidth=linewidth,alpha=0.8)
        axes.plot(X[x]-Y_smooth,D_smooth,color=color,linewidth=linewidth,alpha=0.8)
        axes.plot([X[x]-Y_smooth[0],X[x]+Y_smooth[0]],[D_smooth[0],D_smooth[0]],color=color,linewidth=linewidth,alpha=0.8)
        axes.plot([X[x]-Y_smooth[-1],X[x]+Y_smooth[-1]],[D_smooth[-1],D_smooth[-1]],color=color,linewidth=linewidth,alpha=0.8)
        axes.plot(X[x]-Y_smooth,D_smooth,color=color,linewidth=linewidth,alpha=0.8)
        axes.plot(X[x],np.percentile(data[x],50),'o',markersize=marker_size,markeredgewidth=linewidth,color=color)
        axes.plot([X[x],X[x]],[np.percentile(data[x],25),np.percentile(data[x],75)],color=color,linewidth=2*linewidth,alpha=0.5)
    axes.set_xlim(min(X)-1,max(X)+1)
    axes.set_xlabel(xlabel,fontproperties=font, size=10, style='italic')
    axes.set_xticklabels(axes.get_xticks(),fontproperties=font, size=12)
    axes.set_ylabel(ylabel, fontproperties=font, size=10, style='italic')
    axes.set_yticklabels(axes.get_yticks(),fontproperties=font, size=12)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def work(self, fig=None, ax=None):
        """Draw a two dimensional kernel density plot.
        You can specify either a figure or an axis to draw on.

        Parameters:
        -----------
        fig: matplotlib figure object
        ax: matplotlib axis object to draw on

        Returns:
        --------
        fig, ax: matplotlib figure and axis objects
        """
        if ax is None:
            if fig is None:
                return fig, ax
            else:
                ax = fig.gca()
        x = self.data[self.aes['x']]
        y = self.data[self.aes['y']]

        # TODO: unused?
        # rvs = np.array([x, y])

        x_min = x.min()
        x_max = x.max()
        y_min = y.min()
        y_max = y.max()
        X, Y = np.mgrid[x_min:x_max:200j, y_min:y_max:200j]
        positions = np.vstack([X.ravel(), Y.ravel()])
        values = np.vstack([x, y])
        import scipy.stats as stats
        kernel = stats.gaussian_kde(values)
        Z = np.reshape(kernel(positions).T, X.shape)
        ax.contour(Z, extent=[x_min, x_max, y_min, y_max])
        return fig, ax
项目:lang2program    作者:kelvinguu    | 项目源码 | 文件源码
def plot_pdf(x, cov_factor=None, *args, **kwargs):
    import matplotlib.pyplot as plt
    from scipy.stats import gaussian_kde
    density = gaussian_kde(x)
    xgrid = np.linspace(min(x), max(x), 200)
    if cov_factor is not None:
        density.covariance_factor = lambda: cov_factor
        density._compute_covariance()
    y = density(xgrid)
    plt.plot(xgrid, y, *args, **kwargs)
项目:lang2program    作者:kelvinguu    | 项目源码 | 文件源码
def plot_pdf(x, cov_factor=None, *args, **kwargs):
    import matplotlib.pyplot as plt
    from scipy.stats import gaussian_kde
    density = gaussian_kde(x)
    xgrid = np.linspace(min(x), max(x), 200)
    if cov_factor is not None:
        density.covariance_factor = lambda: cov_factor
        density._compute_covariance()
    y = density(xgrid)
    plt.plot(xgrid, y, *args, **kwargs)
项目:real_estate    作者:cooperoelrichs    | 项目源码 | 文件源码
def model_accuracy(results, scatter_lims, error_density_lims, output_file):
        f, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 10))
        ax1.scatter(results['actuals'], results['estimates'])
        max_point = results[['actuals', 'estimates']].max().max()
        ax1.plot([0, max_point], [0, max_point])
        ax1.set_xlim(scatter_lims)
        ax1.set_ylim(scatter_lims)

        density = gaussian_kde(results['error'][results['estimates']>0])
        x = np.linspace(error_density_lims[0], error_density_lims[1], 10000)
        ax2.plot(x, density(x))

        plt.savefig(output_file)
项目:iota    作者:amaneureka    | 项目源码 | 文件源码
def __init__(self, sample, label=None):
        """Estimates the density function based on a sample.

        sample: sequence of data
        label: string
        """
        self.label = label if label is not None else '_nolegend_'
        self.kde = stats.gaussian_kde(sample)
        low = min(sample)
        high = max(sample)
        self.linspace = np.linspace(low, high, 101)
项目:ndmg    作者:neurodata    | 项目源码 | 文件源码
def plot_rugdensity(series, name=None, ylab=None, xlab=None):
    if len(series) > 1:
        dens = gaussian_kde(series)
        x = np.linspace(np.min(series), np.max(series), 100)
        y = dens.evaluate(x)*np.max(series)

        d_rug = Scatter(
                    x=series,
                    y=[0]*len(series),
                    mode='markers',
                    marker=Marker(
                             color='rgba(0,0,0,0.9)',
                             symbol='line-ns-open',
                             size=10,
                             opacity=0.5
                           ),
                    name=name
              )
    else:
        x = 0
        y = series

    d_dens = Scatter(
                x=x,
                y=y,
                line=Line(
                       color='rgba(0,0,0,0.9)'
                     ),
                hoverinfo='x',
                name=name,
           )
    if len(series) > 1:
        data = [d_dens, d_rug]
    else:
        data = [d_dens]
    layout = std_layout(name, ylab, xlab)
    fig = Figure(data=data, layout=layout)
    return fig
项目:Machine-Learning-Projects    作者:poke19962008    | 项目源码 | 文件源码
def featureMapDensity(lang='py', normed=True):
    freq = getValue(lang, type='freq')
    raw = np.array([])

    for i in xrange(len(freq)):
        raw = np.append(raw, [i]*freq[i])
    print "[SUCCESS] Calculated raw for", lang

    gaussKDE = gaussian_kde(raw, bw_method=0.5)
    ind = np.linspace(1, 115, 200)

    plt.plot(ind, gaussKDE(ind), label="%s"%lang)
项目:openml-pimp    作者:janvanrijn    | 项目源码 | 文件源码
def __init__(self, param_priors, param_hyperparameters, n_iter):
        self.hyperparameters = collections.OrderedDict(sorted(param_hyperparameters.items()))
        self.n_iter = n_iter
        self.param_index = []
        self.distributions = {}

        kde_data = None
        for name, hyperparameter in param_hyperparameters.items():
            if isinstance(hyperparameter, UniformFloatHyperparameter):
                self.param_index.append(name)
                data = np.array(param_priors[name])
                if hyperparameter.log:
                    data = np.log2(data)
                if kde_data is None:
                    kde_data = np.reshape(np.array(data), (1, len(data)))
                else:
                    reshaped = np.reshape(np.array(data), (1, len(data)))
                    kde_data = np.concatenate((kde_data, reshaped), axis=0)
            elif isinstance(hyperparameter, UniformIntegerHyperparameter):
                raise ValueError('UniformIntegerHyperparameter not yet implemented:', name)
            elif isinstance(hyperparameter, CategoricalHyperparameter):
                self.distributions[name] = openmlpimp.utils.rv_discrete_wrapper(name, param_priors[name])
            else:
                raise ValueError()
        if len(self.param_index) < 2:
            raise ValueError('Need at least 2 float hyperparameters')
        self.kde = gaussian_kde(kde_data)
项目:ThinkX    作者:AllenDowney    | 项目源码 | 文件源码
def __init__(self, sample, label=None):
        """Estimates the density function based on a sample.

        sample: sequence of data
        label: string
        """
        self.label = label if label is not None else '_nolegend_'
        self.kde = stats.gaussian_kde(sample)
        low = min(sample)
        high = max(sample)
        self.linspace = np.linspace(low, high, 101)
项目:ThinkX    作者:AllenDowney    | 项目源码 | 文件源码
def __init__(self, sample, label=None):
        """Estimates the density function based on a sample.

        sample: sequence of data
        label: string
        """
        self.label = label if label is not None else '_nolegend_'
        self.kde = stats.gaussian_kde(sample)
        low = min(sample)
        high = max(sample)
        self.linspace = np.linspace(low, high, 101)
项目:flexCE    作者:bretthandrews    | 项目源码 | 文件源码
def weighted_kde(self, data, weights, xs):
        data_wt = np.array([data[i] for i in range(len(data))
                            for w in range(weights[i])])
        density = gaussian_kde(data_wt)
        ys = density(xs)
        return ys
项目:tredparse    作者:humanlongevity    | 项目源码 | 文件源码
def __init__(self, pe):
        self.MINPE = pe.MINPE
        self.x_grid = np.arange(SPAN)
        # Build KDE from global_lens
        kde = gaussian_kde(pe.global_lens)
        pdf = kde.evaluate(self.x_grid)
        #pdf[pdf < SMALL_VALUE] = SMALL_VALUE
        self.pdf = pdf / pdf.sum()
        #self.cdf = np.cumsum(self.pdf)
        self.target_lens = pe.target_lens
        self.ref = pe.ref
        self.db = {}
项目:neurotools    作者:michaelerule    | 项目源码 | 文件源码
def kdepeak(x, x_grid=None):
    '''

    Parameters
    ----------

    Returns
    -------
    '''
    if x_grid==None:
        x_grid = np.linspace(np.min(x),np.max(x),201)
    kde = gaussian_kde(x)
    return x_grid,kde.evaluate(x_grid)
项目:latenttrees    作者:kaltwang    | 项目源码 | 文件源码
def visual_get_kde(self):
        mu = self.__mu.ravel()
        density = gaussian_kde(mu)
        xs = np.linspace(mu.min(),mu.max(),200)
        density.covariance_factor = lambda : .25
        density._compute_covariance()
        return xs, density(xs)
项目:GRIPy    作者:giruenf    | 项目源码 | 文件源码
def create_x_pdf(self, where=None, color=None):
        if where is None:
            where = self.good
        if color is None:
            color = '#bbbbbb'

        xmin, xmax = self.xlim
        ymin, ymax = self.ylim

        x = np.linspace(xmin, xmax, 100)

        x_kde = stats.gaussian_kde(self.x[where])
        x_pdf = x_kde(x)
        x_pdf_n = x_pdf/np.nanmax(x_pdf)*(ymax - ymin)*self._FAC + ymin
        """
        x_pdf = self.crossplot_ax.fill_between(x, x_pdf_n, ymin,
                                               color=color, lw=0,
                                               alpha=alpha, zorder=-1)
        """
        x_pdf, = self.crossplot_ax.plot(x, x_pdf_n, color=color[:3],
                                        alpha=color[-1], zorder=-1)
        #"""
        self.x_pdfs.append(x_pdf)

        self.crossplot_ax.set_xlim(*self.xlim)
        self.crossplot_ax.set_ylim(*self.ylim)
项目:GRIPy    作者:giruenf    | 项目源码 | 文件源码
def create_xy_pdf(self, where=None, color=None):
        if where is None:
            where = self.good
        if color is None:
            color = '#bbbbbb'

        xmin, xmax = self.xlim
        ymin, ymax = self.ylim

        X, Y = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]

        positions = np.vstack([X.ravel(), Y.ravel()])
        values = np.vstack([self.x[where], self.y[where]])
        xy_kde = stats.gaussian_kde(values)
        xy_pdf = np.reshape(xy_kde(positions), X.shape)

        """
        vmin = np.nanmin(xy_pdf)
        vmax = np.nanmax(xy_pdf)
        cmap = matplotlib.cm.gray_r
        xy_pdf = self.crossplot_ax.contourf(X, Y, xy_pdf, 20, cmap=cmap,
                                            vmin=vmin, vmax=2*vmax, alpha=alpha,
                                            zorder=-2)
        """
        xy_pdf = self.crossplot_ax.contour(X, Y, xy_pdf, 5, colors=color[:3],
                                           alpha=color[-1], zorder=-2)
        #"""
        self.xy_pdfs.append(xy_pdf)

        self.crossplot_ax.set_xlim(*self.xlim)
        self.crossplot_ax.set_ylim(*self.ylim)
项目:GRIPy    作者:giruenf    | 项目源码 | 文件源码
def create_x_pdf(self, where=None, color=None):
        if where is None:
            where = self.good
        if color is None:
            color = '#bbbbbb'

        xmin, xmax = self.xlim
        ymin, ymax = self.ylim

        x = np.linspace(xmin, xmax, 100)

        x_kde = stats.gaussian_kde(self.x[where])
        x_pdf = x_kde(x)
        x_pdf_n = x_pdf/np.nanmax(x_pdf)*(ymax - ymin)*self._FAC + ymin
        """
        x_pdf = self.crossplot_ax.fill_between(x, x_pdf_n, ymin,
                                               color=color, lw=0,
                                               alpha=alpha, zorder=-1)
        """
        x_pdf, = self.crossplot_ax.plot(x, x_pdf_n, color=color[:3],
                                        alpha=color[-1], zorder=-1)
        #"""
        self.x_pdfs.append(x_pdf)

        self.crossplot_ax.set_xlim(*self.xlim)
        self.crossplot_ax.set_ylim(*self.ylim)
项目:GRIPy    作者:giruenf    | 项目源码 | 文件源码
def fit(self, data):
        self.gkde = stats.gaussian_kde(data.T, self.bw)
项目:human-rl    作者:gsastry    | 项目源码 | 文件源码
def _step(self, action):
        obs, reward, done, info = self.env.step(action)

        location = info.get('location')
        if location is not None:
            """
            self.locations.append(location)
            if len(self.locations) == self.buffer_size:
                # rebuild the kde
                self.kde = stats.gaussian_kde(np.array(self.locations).T, self.bandwidth)

                # plot it?
                dims = obs.shape[:2]
                grid = np.indices(dims)
                kde = self.kde.logpdf(grid.reshape([2, -1]))
                kde = kde.reshape(dims)
                info['kde'] = kde

                #plt.imsave('test.png', kde)

                # drop the older locations
                self.locations = self.locations[self.buffer_size//2:]

            #plt.imsave('counts.png', self.counts)
            #info['logprob'] = logprob

            if self.kde:
                logpdf = self.kde.logpdf(np.array(location))
                info['logpdf'] = logpdf

                reward -= logpdf
            """

            location = location + self.breadth # padding
            index = tuple(location.tolist())

            patch = extract_patch(self.counts, index, self.breadth)
            count = (self.kernel * patch).sum()

            info['log/visits'] = count

            logprob = np.log(count / self.total)
            info['log/visit_logprob'] = logprob

            #reward = 0

            bonus = self.explore_scale * (self.logprob - logprob) 
            info['log/explore_bonus'] = np.abs(bonus)
            reward += bonus

            self.logprob = logprob

            if self.decay:
                self.counts *= self.decay
            else:
                self.total += 1

            self.counts[index] += 1

        return obs, reward, done, info
项目:human-rl    作者:gsastry    | 项目源码 | 文件源码
def _step(self, action):
        obs, reward, done, info = self.env.step(action)

        location = info.get('location')
        if location is not None:
            """
            self.locations.append(location)
            if len(self.locations) == self.buffer_size:
                # rebuild the kde
                self.kde = stats.gaussian_kde(np.array(self.locations).T, self.bandwidth)

                # plot it?
                dims = obs.shape[:2]
                grid = np.indices(dims)
                kde = self.kde.logpdf(grid.reshape([2, -1]))
                kde = kde.reshape(dims)
                info['kde'] = kde

                #plt.imsave('test.png', kde)

                # drop the older locations
                self.locations = self.locations[self.buffer_size//2:]

            #plt.imsave('counts.png', self.counts)
            #info['logprob'] = logprob

            if self.kde:
                logpdf = self.kde.logpdf(np.array(location))
                info['logpdf'] = logpdf

                reward -= logpdf
            """

            location = location + self.breadth # padding
            index = tuple(location.tolist())

            patch = extract_patch(self.counts, index, self.breadth)
            count = (self.kernel * patch).sum()

            info['log/visits'] = count

            logprob = np.log(count / self.total)
            info['log/visit_logprob'] = logprob

            #reward = 0

            bonus = self.explore_scale * (self.logprob - logprob) 
            info['log/explore_bonus'] = np.abs(bonus)
            reward += bonus

            self.logprob = logprob

            if self.decay:
                self.counts *= self.decay
            else:
                self.total += 1

            self.counts[index] += 1

        return obs, reward, done, info