Python numpy 模块,percentile() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.percentile()

项目:spikefuel    作者:duguyue100    | 项目源码 | 文件源码
def remove_outliers(idx_ts):
    """Remove outliers from a given timestamps array.

    Parameters
    ----------
    idx_ts : numpy.ndarray
        given timestamp array.

    Returns
    -------
    idx_ts_new : numpy.ndarray.
        outliers removed timestamp array.
    """
    idx_ts_new = idx_ts.copy()
    summary = np.percentile(idx_ts_new, [25, 50, 75])
    high_lim = summary[0] - 1.5 * (summary[2] - summary[1])
    low_lim = summary[2] + 1.5 * (summary[2] - summary[1])

    idx_ts_new = idx_ts_new[~(idx_ts_new >= low_lim)]
    idx_ts_new = idx_ts_new[~(idx_ts_new <= high_lim)]

    return idx_ts_new
项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def _classify_gems(counts0, counts1):
        """ Infer number of distinct transcriptomes present in each GEM (1 or 2) and
            report cr_constants.GEM_CLASS_GENOME0 for a single cell w/ transcriptome 0,
            report cr_constants.GEM_CLASS_GENOME1 for a single cell w/ transcriptome 1,
            report cr_constants.GEM_CLASS_MULTIPLET for multiple transcriptomes """
        # Assumes that most of the GEMs are single-cell; model counts independently
        thresh0, thresh1 = [cr_constants.DEFAULT_MULTIPLET_THRESHOLD] * 2
        if sum(counts0 > counts1) >= 1 and sum(counts1 > counts0) >= 1:
            thresh0 = np.percentile(counts0[counts0 > counts1], cr_constants.MULTIPLET_PROB_THRESHOLD)
            thresh1 = np.percentile(counts1[counts1 > counts0], cr_constants.MULTIPLET_PROB_THRESHOLD)

        doublet = np.logical_and(counts0 >= thresh0, counts1 >= thresh1)
        dtype = np.dtype('|S%d' % max(len(cls) for cls in cr_constants.GEM_CLASSES))
        result = np.where(doublet, cr_constants.GEM_CLASS_MULTIPLET, cr_constants.GEM_CLASS_GENOME0).astype(dtype)
        result[np.logical_and(np.logical_not(result == cr_constants.GEM_CLASS_MULTIPLET), counts1 > counts0)] = cr_constants.GEM_CLASS_GENOME1

        return result
项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def get_normalized_dispersion(mat_mean, mat_var, nbins=20):
    mat_disp = (mat_var - mat_mean) / np.square(mat_mean)

    quantiles = np.percentile(mat_mean, np.arange(0, 100, 100 / nbins))
    quantiles = np.append(quantiles, mat_mean.max())

    # merge bins with no difference in value
    quantiles = np.unique(quantiles)

    if len(quantiles) <= 1:
        # pathological case: the means are all identical. just return raw dispersion.
        return mat_disp

    # calc median dispersion per bin
    (disp_meds, _, disp_bins) = scipy.stats.binned_statistic(mat_mean, mat_disp, statistic='median', bins=quantiles)

    # calc median absolute deviation of dispersion per bin
    disp_meds_arr = disp_meds[disp_bins-1] # 0th bin is empty since our quantiles start from 0
    disp_abs_dev = abs(mat_disp - disp_meds_arr)
    (disp_mads, _, disp_bins) = scipy.stats.binned_statistic(mat_mean, disp_abs_dev, statistic='median', bins=quantiles)

    # calculate normalized dispersion
    disp_mads_arr = disp_mads[disp_bins-1]
    disp_norm = (mat_disp - disp_meds_arr) / disp_mads_arr
    return disp_norm
项目:pycma    作者:CMA-ES    | 项目源码 | 文件源码
def prctile(data, p_vals=[0, 25, 50, 75, 100], sorted_=False):
        """``prctile(data, 50)`` returns the median, but p_vals can
        also be a sequence.

        Provides for small samples or extremes IMHO better values than
        matplotlib.mlab.prctile or np.percentile, however also slower.

        """
        ps = [p_vals] if np.isscalar(p_vals) else p_vals

        if not sorted_:
            data = sorted(data)
        n = len(data)
        d = []
        for p in ps:
            fi = p * n / 100 - 0.5
            if fi <= 0:  # maybe extrapolate?
                d.append(data[0])
            elif fi >= n - 1:
                d.append(data[-1])
            else:
                i = int(fi)
                d.append((i + 1 - fi) * data[i] + (fi - i) * data[i + 1])
        return d[0] if np.isscalar(p_vals) else d
项目:tsbitmaps    作者:binhmop    | 项目源码 | 文件源码
def fit_predict(self, ts):
        """
        Unsupervised training of TSBitMaps.

        :param ts: 1-D numpy array or pandas.Series 
        :return labels: `+1` for normal observations and `-1` for abnormal observations
        """
        assert self._lag_window_size > self._feature_window_size, 'lag_window_size must be >= feature_window_size'

        self._ref_ts = ts
        scores = self._slide_chunks(ts)
        self._ref_bitmap_scores = scores

        thres = np.percentile(scores[self._lag_window_size: -self._lead_window_size + 1], self._q)

        labels = np.full(len(ts), 1)
        for idx, score in enumerate(scores):
            if score > thres:
                labels[idx] = -1

        return labels
项目:pyISC    作者:STREAM3    | 项目源码 | 文件源码
def fit(self, X, y=None):
        old_threshold = None
        threshold = None
        self.threshold_ = 0.0

        self._fit(X,y)

        count = 0
        while count < 100 and (old_threshold is None or abs(threshold - old_threshold) > 0.01):
            old_threshold = threshold
            ss = self.decision_function(X,y)
            threshold = percentile(ss, 100 * self.contamination)

            self._fit(X[ss > threshold],y[ss > threshold] if y is not None else None)

            count += 1

        self.threshold_ = threshold

        return self
项目:BlueWhale    作者:caffe2    | 项目源码 | 文件源码
def _update_quantile(self):
        states = np.array(self._quantile_states, dtype=np.float32)
        limited_action_values = self.action_values(states, self._limited_action)
        base_action_values = np.max(
            np.array(
                [
                    self.action_values(states, action)
                    for action in six.moves.range(self.num_actions)
                    if action != self._limited_action
                ]
            ),
            axis=0
        )
        target = np.percentile(
            limited_action_values - base_action_values, self._quantile
        )
        print("REWARD PENALTY TARGET:", target)
        self.quantile_value += self._quantile_update_rate * target
        print("QUANTILE:", self.quantile_value)
项目:dc_stat_think    作者:justinbois    | 项目源码 | 文件源码
def b_value(mags, mt, perc=[2.5, 97.5], n_reps=None):
    """Compute the b-value and optionally its confidence interval."""
    # Extract magnitudes above completeness threshold
    m = mags[mags >= mt]

    # Compute b-value
    b = (np.mean(m) - mt) * np.log(10)

    # Draw bootstrap replicates
    if n_reps is None:
        return b
    else:
        m_bs_reps = dcst.draw_bs_reps(m, np.mean, size=n_reps)

        # Compute b-value from replicates
        b_bs_reps = (m_bs_reps - mt) * np.log(10)

        # Compute confidence interval
        conf_int = np.percentile(b_bs_reps, perc)

        return b, conf_int
项目:histwords    作者:williamleif    | 项目源码 | 文件源码
def simple_slope_percentiles(res, df, target, varying, percs=[25, 50, 75]):
    exog = {}
    for param in res.fe_params.index:
        if len(param.split(":")) != 1:
            continue
        if param == "Intercept":
            exog[param] = 1.0
        else:
            exog[param] = np.median(df[param])
    ret_vals = collections.OrderedDict()
    for varying_perc in percs:
        exog[varying] = np.percentile(df[varying], varying_perc)
        ret_vals[exog[varying]] = collections.defaultdict(list)
        for target_perc in [25, 75]:
            exog[target] = np.percentile(df[target], target_perc)
            exog_arr = np.array([exog[param] if len(param.split(":")) == 1 else exog[param.split(":")[0]] * exog[param.split(":")[1]]
                for param in res.fe_params.index])
            ret_vals[exog[varying]]["endog"].append(res.model.predict(res.fe_params, exog=exog_arr))
            ret_vals[exog[varying]]["target"].append(exog[target])
    return ret_vals
项目:histwords    作者:williamleif    | 项目源码 | 文件源码
def simple_slope_categories(res, df, target, cat, cats):
    exog = {}
    for param in res.fe_params.index:
        if len(param.split(":")) != 1:
            continue
        if param == "Intercept":
            exog[param] = 1.0
        elif param in cats:
            exog[param] = 0
        else:
            exog[param] = np.mean(df[param])
    if cat != None:
        exog[cat] = 1
    x_points = []
    y_points = []
    for target_perc in [10, 90]:
        exog[target] = np.percentile(df[target], target_perc)
#        exog[target] = target_perc
        exog_arr = np.array([exog[param] if len(param.split(":")) == 1 else exog[param.split(":")[0]] * exog[param.split(":")[1]]
            for param in res.fe_params.index])
        y_points.append(res.model.predict(res.fe_params, exog=exog_arr))
        x_points.append(exog[target])
    return x_points, y_points
项目:empyrical    作者:quantopian    | 项目源码 | 文件源码
def test_value_at_risk(self):
        value_at_risk = self.empyrical.value_at_risk

        returns = [1.0, 2.0]
        assert_almost_equal(value_at_risk(returns, cutoff=0.0), 1.0)
        assert_almost_equal(value_at_risk(returns, cutoff=0.3), 1.3)
        assert_almost_equal(value_at_risk(returns, cutoff=1.0), 2.0)

        returns = [1, 81, 82, 83, 84, 85]
        assert_almost_equal(value_at_risk(returns, cutoff=0.1), 41)
        assert_almost_equal(value_at_risk(returns, cutoff=0.2), 81)
        assert_almost_equal(value_at_risk(returns, cutoff=0.3), 81.5)

        # Test a returns stream of 21 data points at different cutoffs.
        returns = np.random.normal(0, 0.02, 21)
        for cutoff in (0, 0.0499, 0.05, 0.20, 0.999, 1):
            assert_almost_equal(
                value_at_risk(returns, cutoff),
                np.percentile(returns, cutoff * 100),
            )
项目:empyrical    作者:quantopian    | 项目源码 | 文件源码
def value_at_risk(returns, cutoff=0.05):
    """
    Value at risk (VaR) of a returns stream.

    Parameters
    ----------
    returns : pandas.Series or 1-D numpy.array
        Non-cumulative daily returns.
    cutoff : float, optional
        Decimal representing the percentage cutoff for the bottom percentile of
        returns. Defaults to 0.05.

    Returns
    -------
    VaR : float
        The VaR value.
    """
    return np.percentile(returns, 100 * cutoff)
项目:website-fingerprinting    作者:AxelGoetz    | 项目源码 | 文件源码
def get_packets_per_second(trace, features):
    """
    Gets the total number of packets per second along with mean, standard deviation, min, max and median

    @return a 1D list that contains the packets per second
    """
    packets_per_second = {}
    for val in trace:
        second = floor(val[0])

        if second not in packets_per_second:
            packets_per_second[second] = 0

        packets_per_second[second] += 1

    l = list(packets_per_second.values())

    features.append(sum(l) / len(l))
    features.append(np.std(l))
    features.append(np.percentile(l, 50))
    features.append(min(l))
    features.append(max(l))

    return l
项目:website-fingerprinting    作者:AxelGoetz    | 项目源码 | 文件源码
def get_inter_arrival_time(trace, in_trace, out_trace, features):
    """
    For both the complete trace, the trace only containing incoming and the trace only containing outcoming packets,
    we calculate the inter-arrival time.
    Next, we add the max, mean, standard deviation and third quartile as features.

    @param in_trace contains all the incoming packets and nothing else
    @param out_trace contains all the outcoming packets and nothing else
    """
    complete_inter_arrival_time = inter_packet_time(trace)

    in_inter_arrival_time = inter_packet_time(in_trace)
    out_inter_arrival_time = inter_packet_time(out_trace)

    inter_arrival_times = [complete_inter_arrival_time, in_inter_arrival_time, out_inter_arrival_time]

    for time in inter_arrival_times:
        if len(time) == 0:
            features.extend([0, 0, 0, 0])

        else:
            features.append(max(time))
            features.append(sum(time) / len(time))
            features.append(np.std(time))
            features.append(np.percentile(time, 75))
项目:website-fingerprinting    作者:AxelGoetz    | 项目源码 | 文件源码
def get_transmission_time_stats(trace, in_trace, out_trace, features):
    """
    For the complete trace and the traces only containing incoming and outcoming packets, we extract the following:
    - First, second and third quartile
    - Total transmission time
    """
    in_times    = [x[0] for x in in_trace]
    out_times   = [x[0] for x in out_trace]
    total_times = [x[0] for x in trace]

    times = [total_times, in_times, out_times]

    for time in times:
        if len(time) == 0:
            features.extend([0, 0, 0, 0])

        else:
            features.append(np.percentile(time, 25))
            features.append(np.percentile(time, 50))
            features.append(np.percentile(time, 75))
            features.append(np.percentile(time, 100))
项目:XTREE    作者:ai-se    | 项目源码 | 文件源码
def deltasCSVWriter(self, name='ant'):
    "Changes"
    header = array([h.name[1:] for h in self.test.headers[:-2]])
    oldRows = [r for r, p in zip(self.test._rows, self.pred) if p > 0]
    delta = array([self.delta(t) for t in oldRows])
    y = median(delta, axis=0)
    yhi, ylo = percentile(delta, q=[75, 25], axis=0)
    dat1 = sorted(
        [(h, a, b, c) for h, a, b, c in zip(header, y, ylo, yhi)], key=lambda F: F[1])
    dat = asarray([(d[0], n, d[1], d[2], d[3])
                   for d, n in zip(dat1, range(1, 21))])
    with open('/Users/rkrsn/git/GNU-Plots/rkrsn/errorbar/%s.csv' % (name), 'w') as csvfile:
      writer = csv.writer(csvfile, delimiter=' ')
      for el in dat[()]:
        writer.writerow(el)
    # new = [self.newRow(t) for t in oldRows]
项目:radar    作者:amoose136    | 项目源码 | 文件源码
def _nanpercentile(a, q, axis=None, out=None, overwrite_input=False,
                   interpolation='linear', keepdims=False):
    """
    Private function that doesn't support extended axis or keepdims.
    These methods are extended to this function using _ureduce
    See nanpercentile for parameter usage

    """
    if axis is None:
        part = a.ravel()
        result = _nanpercentile1d(part, q, overwrite_input, interpolation)
    else:
        result = np.apply_along_axis(_nanpercentile1d, axis, a, q,
                                     overwrite_input, interpolation)
        # apply_along_axis fills in collapsed axis with results.
        # Move that axis to the beginning to match percentile's
        # convention.
        if q.ndim != 0:
            result = np.rollaxis(result, axis)   

    if out is not None:
        out[...] = result
    return result
项目:radar    作者:amoose136    | 项目源码 | 文件源码
def test_out(self):
        mat = np.random.rand(3, 3)
        nan_mat = np.insert(mat, [0, 2], np.nan, axis=1)
        resout = np.zeros(3)
        tgt = np.percentile(mat, 42, axis=1)
        res = np.nanpercentile(nan_mat, 42, axis=1, out=resout)
        assert_almost_equal(res, resout)
        assert_almost_equal(res, tgt)
        # 0-d output:
        resout = np.zeros(())
        tgt = np.percentile(mat, 42, axis=None)
        res = np.nanpercentile(nan_mat, 42, axis=None, out=resout)
        assert_almost_equal(res, resout)
        assert_almost_equal(res, tgt)
        res = np.nanpercentile(nan_mat, 42, axis=(0, 1), out=resout)
        assert_almost_equal(res, resout)
        assert_almost_equal(res, tgt)
项目:radar    作者:amoose136    | 项目源码 | 文件源码
def test_multiple_percentiles(self):
        perc = [50, 100]
        mat = np.ones((4, 3))
        nan_mat = np.nan * mat
        # For checking consistency in higher dimensional case
        large_mat = np.ones((3, 4, 5))
        large_mat[:, 0:2:4, :] = 0
        large_mat[:, :, 3:] *= 2
        for axis in [None, 0, 1]:
            for keepdim in [False, True]:
                with warnings.catch_warnings(record=True) as w:
                    warnings.simplefilter('always')
                    val = np.percentile(mat, perc, axis=axis, keepdims=keepdim)
                    nan_val = np.nanpercentile(nan_mat, perc, axis=axis,
                                               keepdims=keepdim)
                    assert_equal(nan_val.shape, val.shape)

                    val = np.percentile(large_mat, perc, axis=axis,
                                        keepdims=keepdim)
                    nan_val = np.nanpercentile(large_mat, perc, axis=axis,
                                               keepdims=keepdim)
                    assert_equal(nan_val, val)

        megamat = np.ones((3, 4, 5, 6))
        assert_equal(np.nanpercentile(megamat, perc, axis=(1, 2)).shape, (2, 3, 6))
项目:radar    作者:amoose136    | 项目源码 | 文件源码
def test_keepdims(self):
        d = np.ones((3, 5, 7, 11))
        assert_equal(np.percentile(d, 7, axis=None, keepdims=True).shape,
                     (1, 1, 1, 1))
        assert_equal(np.percentile(d, 7, axis=(0, 1), keepdims=True).shape,
                     (1, 1, 7, 11))
        assert_equal(np.percentile(d, 7, axis=(0, 3), keepdims=True).shape,
                     (1, 5, 7, 1))
        assert_equal(np.percentile(d, 7, axis=(1,), keepdims=True).shape,
                     (3, 1, 7, 11))
        assert_equal(np.percentile(d, 7, (0, 1, 2, 3), keepdims=True).shape,
                     (1, 1, 1, 1))
        assert_equal(np.percentile(d, 7, axis=(0, 1, 3), keepdims=True).shape,
                     (1, 1, 7, 1))

        assert_equal(np.percentile(d, [1, 7], axis=(0, 1, 3),
                                   keepdims=True).shape, (2, 1, 1, 7, 1))
        assert_equal(np.percentile(d, [1, 7], axis=(0, 3),
                                   keepdims=True).shape, (2, 1, 5, 7, 1))
项目:radar    作者:amoose136    | 项目源码 | 文件源码
def test_out_nan(self):
        with warnings.catch_warnings(record=True):
            warnings.filterwarnings('always', '', RuntimeWarning)
            o = np.zeros((4,))
            d = np.ones((3, 4))
            d[2, 1] = np.nan
            assert_equal(np.percentile(d, 0, 0, out=o), o)
            assert_equal(
                np.percentile(d, 0, 0, interpolation='nearest', out=o), o)
            o = np.zeros((3,))
            assert_equal(np.percentile(d, 1, 1, out=o), o)
            assert_equal(
                np.percentile(d, 1, 1, interpolation='nearest', out=o), o)
            o = np.zeros(())
            assert_equal(np.percentile(d, 1, out=o), o)
            assert_equal(
                np.percentile(d, 1, interpolation='nearest', out=o), o)
项目:planetplanet    作者:rodluger    | 项目源码 | 文件源码
def quantile(x, q, weights=None):
    """
    Like numpy.percentile, but:

    * Values of q are quantiles [0., 1.] rather than percentiles [0., 100.]
    * scalar q not supported (q must be iterable)
    * optional weights on x

    """
    if weights is None:
        return np.percentile(x, [100. * qi for qi in q])
    else:
        idx = np.argsort(x)
        xsorted = x[idx]
        cdf = np.add.accumulate(weights[idx])
        cdf /= cdf[-1]
        return np.interp(q, cdf, xsorted).tolist()
项目:rdtools    作者:NREL    | 项目源码 | 文件源码
def _degradation_CI(results):
    '''
    Description
    -----------
    Monte Carlo estimation of uncertainty in degradation rate from OLS results

    Parameters
    ----------
    results: OLSResults object from fitting a model of the form:
    results = sm.OLS(endog = df.energy_ma, exog = df.loc[:,['const','years']]).fit()
    Returns
    -------
    68.2% confidence interval for degradation rate

    '''

    sampled_normal = np.random.multivariate_normal(results.params, results.cov_params(), 10000)
    dist = sampled_normal[:, 1] / sampled_normal[:, 0]
    Rd_CI = np.percentile(dist, [50 - 34.1, 50 + 34.1]) * 100.0
    return Rd_CI
项目:bmcmc    作者:sanjibs    | 项目源码 | 文件源码
def info(self,burn=1000,plot=False):
        """
        Print the summary statistics and optionally plot the results
        """
        rows=len(self.varnames)
        cols=2
        chain=np.array(self.chain[burn:])
        nsize=chain.shape[0]
#        print rows,cols
        print '%4s %16s %12s %12s [%12s, %12s, %12s]'%('no','name','mean','stddev','16%','50%','84%')
        for i,name in enumerate(self.varnames):
            temp=np.percentile(chain[:,i],[16.0,84.0,50.0])
            print '%4i %16s %12g %12g [%12g, %12g, %12g]'%(i,name,np.mean(chain[:,i]),(temp[1]-temp[0])/2.0,temp[0],temp[2],temp[1])
            if plot:
                ax=plt.subplot(rows,cols,2*i+1) 
#                plt.text(0.05,0.9,r'$\tau$='+'%5.1f'%(acor.acor(chain[:,i])[0]),transform=ax.transAxes)
                plt.plot(chain[:,i])
                plt.ylabel(self.model.descr[name][3])
                plt.xlabel('Iteration')
                ax=plt.subplot(rows,cols,2*i+2) 
                plt.hist(chain[:,i],bins=100,histtype='step')
                plt.text(0.05,0.9,sround(np.mean(chain[:,i]),temp[0],temp[1]),transform=ax.transAxes)
                plt.xlabel(self.model.descr[name][3])
                # plt.text(0.05,0.9,'%6g %3g (%4g-%4g)'%(np.mean(chain[:,i]),(temp[1]-temp[0])/2.0,temp[0],temp[1]),transform=ax.transAxes)
项目:rl_trading    作者:ucaiado    | 项目源码 | 文件源码
def filterout_outliers(l_data, l_date):
    '''
    Return a list with data filtered from outliers

    :param l_data: list. data to be analyzed
    '''
    # === old code to filter outliers =======
    # Q3 = np.percentile(l_data, 98)
    # Q1 = np.percentile(l_data, 2)
    # step = (Q3 - Q1) * 1.5
    # step = max(3000., step)
    # na_val = np.array(l_data)
    # na_val = na_val[(na_val >= Q1 - step) & (na_val <= Q3 + step)]
    # return na_val
    # =======================================
    # group by minute
    df_filter = pd.Series(np.array(l_date)/60).astype(int)
    l_filter = list((df_filter != df_filter.shift()).values)
    l_filter[0] = True
    l_filter[-1] = True
    return np.array(pd.Series(l_data)[l_filter].values)
项目:rl_trading    作者:ucaiado    | 项目源码 | 文件源码
def filterout_outliers(l_data, l_date):
    '''
    Return a list with data filtered from outliers

    :param l_data: list. data to be analyzed
    '''
    # === old code to filter outliers =======
    # Q3 = np.percentile(l_data, 98)
    # Q1 = np.percentile(l_data, 2)
    # step = (Q3 - Q1) * 1.5
    # step = max(3000., step)
    # na_val = np.array(l_data)
    # na_val = na_val[(na_val >= Q1 - step) & (na_val <= Q3 + step)]
    # return na_val
    # =======================================
    # group by minute
    df_filter = pd.Series(np.array(l_date)/60).astype(int)
    l_filter = list((df_filter != df_filter.shift()).values)
    l_filter[0] = True
    l_filter[-1] = True
    return np.array(pd.Series(l_data)[l_filter].values)
项目:facade-segmentation    作者:jfemiani    | 项目源码 | 文件源码
def _cut_windows_vertically(self, door_top, roof_top, sky_sig, win_strip):
        win_sig = np.percentile(win_strip, 85, axis=1)
        win_sig[sky_sig > 0.5] = 0
        if win_sig.max() > 0:
            win_sig /= win_sig.max()
        win_sig[:roof_top] = 0
        win_sig[door_top:] = 0
        runs, starts, values = run_length_encode(win_sig > 0.5)
        win_heights = runs[values]
        win_tops = starts[values]
        if len(win_heights) > 0:
            win_bottom = win_tops[-1] + win_heights[-1]
            win_top = win_tops[0]
            win_vertical_spacing = np.diff(win_tops).mean() if len(win_tops) > 1 else 0
        else:
            win_bottom = win_top = win_vertical_spacing = -1

        self.top = int(win_top)
        self.bottom = int(win_bottom)
        self.vertical_spacing = int(win_vertical_spacing)
        self.vertical_scores = make_list(win_sig)
        self.heights = np.array(win_heights)
        self.tops = np.array(win_tops)
项目:expan    作者:zalando    | 项目源码 | 文件源码
def _first_interval(x, n_bins):
    """
    Gets the first interval based on the percentiles, 
    either a closed interval containing the same value multiple times
    or a closed-open interval with a different lower and upper bound.
    """
    # calculate the percentiles
    percentiles = np.linspace(0., 100., n_bins + 1)
    bounds = np.percentile(x, q=percentiles, interpolation='higher')
    lower = bounds[0]
    upper = bounds[1]

    if lower == upper:
        return lower, upper, True, True
    else:
        return lower, upper, True, False


#------- private methods for categorical binnings-------#
项目:CS-SMAF    作者:brian-cleary    | 项目源码 | 文件源码
def random_submatrix(Z,g,s,o,threshpct=99,returnIdx=False):
    xg = np.zeros(Z.shape[0],dtype=np.bool)
    xg[:g] = True
    np.random.shuffle(xg)
    xt = np.zeros(Z.shape[1],dtype=np.bool)
    xt[:s] = True
    np.random.shuffle(xt)
    X0 = Z[xg][:,xt]
    thresh = np.percentile(X0,threshpct)
    X0[(X0 > thresh)] = thresh
    xo = np.zeros(X0.shape[0],dtype=np.bool)
    xo[:o] = True
    np.random.shuffle(xo)
    Xobs = X0[xo]
    if returnIdx:
        return X0,xo,Xobs,xt,xg
    else:
        return X0,xo,Xobs
项目:hand_eye_calibration    作者:ethz-asl    | 项目源码 | 文件源码
def filter_and_smooth_angular_velocity(angular_velocity,
                                       low_pass_kernel_size, clip_percentile, plot=False):
  """Reduce the noise in a velocity signal."""

  max_value = np.percentile(angular_velocity, clip_percentile)
  print("Clipping angular velocity norms to {} rad/s ...".format(max_value))
  angular_velocity_clipped = np.clip(angular_velocity, -max_value, max_value)
  print("Done clipping angular velocity norms...")

  low_pass_kernel = np.ones((low_pass_kernel_size, 1)) / low_pass_kernel_size
  print("Smoothing with kernel size {} samples...".format(low_pass_kernel_size))

  angular_velocity_smoothed = signal.correlate(angular_velocity_clipped,
                                               low_pass_kernel, 'same')

  print("Done smoothing angular velocity norms...")

  if plot:
    plot_angular_velocities("Angular Velocities", angular_velocity,
                            angular_velocity_smoothed, True)

  return angular_velocity_smoothed.copy()
项目:pyflux    作者:RJT1990    | 项目源码 | 文件源码
def norm_post_sim(modes,cov_matrix):
    post = multivariate_normal(modes,cov_matrix)
    nsims = 30000
    phi = np.zeros([nsims,len(modes)])

    for i in range(0,nsims):
         phi[i] = post.rvs()

    chain = np.array([phi[i][0] for i in range(len(phi))])
    for m in range(1,len(modes)):
        chain = np.vstack((chain,[phi[i][m] for i in range(len(phi))]))

    mean_est = [np.mean(np.array([phi[i][j] for i in range(len(phi))])) for j in range(len(modes))]
    median_est = [np.median(np.array([phi[i][j] for i in range(len(phi))])) for j in range(len(modes))]
    upper_95_est = [np.percentile(np.array([phi[i][j] for i in range(len(phi))]),95) for j in range(len(modes))]
    lower_95_est = [np.percentile(np.array([phi[i][j] for i in range(len(phi))]),5) for j in range(len(modes))]

    return chain, mean_est, median_est, upper_95_est, lower_95_est
项目:tbp-next-basket    作者:GiulioRossetti    | 项目源码 | 文件源码
def calculate_aggregate(values):
    agg_measures = {
        'avg': np.mean(values),
        'std': np.std(values),
        'var': np.var(values),
        'med': np.median(values),
        '10p': np.percentile(values, 10),
        '25p': np.percentile(values, 25),
        '50p': np.percentile(values, 50),
        '75p': np.percentile(values, 75),
        '90p': np.percentile(values, 90),
        'iqr': np.percentile(values, 75) - np.percentile(values, 25),
        'iqm': interquartile_range_mean(values),
        'mad': mean_absolute_deviation(values),
        'cov': 1.0 * np.mean(values) / np.std(values),
        'gin': gini_coefficient(values),
        'skw': stats.skew(values),
        'kur': stats.kurtosis(values),
        'sum': np.sum(values)
    }

    return agg_measures
项目:lorelei-speech-evaluation    作者:usc-sail    | 项目源码 | 文件源码
def get_pr(reference_frames,output_frames,mode='type',pr_resolution=100):

    # filter output by confidence
    confidence = collect_confidence(output_frames)
    conf_order = []
    step = 100/float(pr_resolution)
    for j in range(1,pr_resolution+1):
        conf_order.append( np.percentile(confidence, j*step) )
    conf_order = [-1] + conf_order + [2]

    # get curve
    params = []
    for threshold in conf_order:
        params.append( [ reference_frames, output_frames, confidence, threshold, mode ] )
    all_tp, all_fp, all_fn, all_prec, all_rec = zip(*pool.map(single_point, params))
    all_prec = list(all_prec) #+ [0]
    all_rec  = list(all_rec) #+ [1]

    all_rec, all_prec = zip(*sorted(zip(all_rec, all_prec)))
    AUC = metrics.auc(all_rec, all_prec)

    return all_rec, all_prec, AUC


# create complete output for 1 mode --------------------------------------------
项目:IntelliChess    作者:lifei96    | 项目源码 | 文件源码
def results_stat():
    for file in os.listdir('./results'):
        if file.endswith('.txt'):
            file_path = os.path.join('./results', file)
            data = [[], [], [], []]
            with open(file_path, 'r') as f:
                lines = f.read().split('\n')
                for line in lines:
                    if len(line) > 0:
                        line_data = map(eval, line.split())
                        for i in range(4):
                            data[i].append(line_data[i])
            lines = [[], [], [], []]
            for i in range(4):
                lines[i].append(np.mean(data[i]))
                lines[i].append(np.std(data[i]))
                lines[i].append(np.min(data[i]))
                lines[i].append(np.percentile(data[i], 25))
                lines[i].append(np.percentile(data[i], 50))
                lines[i].append(np.percentile(data[i], 75))
                lines[i].append(np.max(data[i]))
            output_path = os.path.join('./stat', file)
            with open(output_path, 'w') as f:
                for line in lines:
                    f.write('\t'.join(map(str, line)) + '\n')
项目:kharita    作者:vipyoung    | 项目源码 | 文件源码
def splitclusters(datapointwts,seeds,theta):
    std = {}; seeds1 = []; seedweight = [];
    cluster, p2cluster = point2cluster(datapointwts, seeds,theta);
    for cl in cluster:
        mang = seeds[cl][-1];
        if len(cluster[cl]) > 10:
            std[cl] = np.percentile([angledist(xx[2], mang) for xx in cluster[cl]], 90)
            clockwise = [xx for xx in cluster[cl] if greaterthanangle(xx[2], mang)];
            if std[cl]>20 and len(clockwise)>0 and len(clockwise)<len(cluster[cl]):
                seeds1.append(avgpoint(clockwise))
                seeds1.append(avgpoint([xx for xx in cluster[cl] if not greaterthanangle(xx[2], mang)]))
                seedweight.append(len(clockwise))
                seedweight.append(len(cluster[cl]) -len(clockwise))
            else:
                seeds1.append(seeds[cl]); seedweight.append(len(cluster[cl]))
        else:
            seeds1.append(seeds[cl]); seedweight.append(len(cluster[cl]))
    return seeds1, seedweight
项目:kharita    作者:vipyoung    | 项目源码 | 文件源码
def splitclustersparallel(datapointwts,seeds):
    X = [(xx[0], xx[1]) for xx in datapointwts];    S = [(xx[0], xx[1]) for xx in seeds];cluster = {};p2cluster = []; gedges = {}; gedges1 = {}; nedges = {}; std = {}; seeds1 = []; seedweight = []; roadwidth = [];
    nbrs = NearestNeighbors(n_neighbors=20, algorithm='ball_tree').fit(S)
    distances, indices = nbrs.kneighbors(X)
    for cd in range(len(seeds)):
        cluster[cd] = []; roadwidth.append(0);
    for ii, ll in enumerate(indices):
        dd = [taxidist(seeds[xx], datapointwts[ii][:-1],theta) for xx in ll]
        cd = ll[dd.index(min(dd))];
        cluster[cd].append(datapointwts[ii])
        p2cluster.append(cd)
    for cl in cluster:
        mang = seeds[cl][-1];
        scl = seeds[cl]
        if len(cluster[cl]) > 10:
            std[cl] = np.percentile([angledist(xx[2], mang) for xx in cluster[cl]], 90)
            roadwidth[cl] = 1+5*np.std([geodist(scl,xx)*np.sin(anglebetweentwopoints(scl,xx)-scl[-1])  for xx in cluster[cl]])
            print(cl,scl,[(anglebetweentwopoints(scl,xx),scl[-1])  for xx in cluster[cl]])
项目:pbtranscript    作者:PacificBiosciences    | 项目源码 | 文件源码
def _write_config(self, fasta_filename):
        """Write daligner sensitive config to fasta_filename.sensitive.config."""
        lens = [len(r.sequence) for r in ContigSetReaderWrapper(fasta_filename)]
        self.low_cDNA_size, self.high_cDNA_size = 0, 0
        if len(lens) == 1:
            self.low_cDNA_size, self.high_cDNA_size = lens[0], lens[0]
        if len(lens) >= 2:
            self.low_cDNA_size  = int(np.percentile(lens, 10))
            self.high_cDNA_size = int(np.percentile(lens, 90))

        try:
            with open(fasta_filename+'.sensitive.config', 'w') as f:
                f.write("sensitive={s}\n".format(s=self.sensitive_mode))
                f.write("low={l}\n".format(l=self.low_cDNA_size))
                f.write("high={h}\n".format(h=self.high_cDNA_size))
        except IOError:
            pass # it's OK not to have write permission
项目:chainladder-python    作者:jbogaardt    | 项目源码 | 文件源码
def summary(self):
        """ Method to produce a summary table of of the Mack Chainladder 
        model.

        Returns:
            This calculation is consistent with the R calculation 
            BootChainLadder$summary

        """
        IBNR = self.IBNR
        tri = self.tri
        summary = pd.DataFrame()
        summary['Latest'] = tri.get_latest_diagonal()
        summary['Mean Ultimate'] = summary['Latest'] + pd.Series([np.mean(np.array(IBNR)[:,num]) for num in range(len(tri.data))],index=summary.index)
        summary['Mean IBNR'] = summary['Mean Ultimate'] - summary['Latest'] 
        summary['SD IBNR'] = pd.Series([np.std(np.array(IBNR)[:,num]) for num in range(len(tri.data))],index=summary.index)
        summary['IBNR 75%'] = pd.Series([np.percentile(np.array(IBNR)[:,num],q=75) for num in range(len(tri.data))],index=summary.index)
        summary['IBNR 95%'] = pd.Series([np.percentile(np.array(IBNR)[:,num],q=95) for num in range(len(tri.data))],index=summary.index)
        return summary
项目:tools    作者:kastnerkyle    | 项目源码 | 文件源码
def run_ltsd_example():
    fs, d = fetch_sample_speech_tapestry()
    winsize = 1024
    d = d.astype("float32") / 2 ** 15
    d -= d.mean()

    pad = 3 * fs
    noise_pwr = np.percentile(d, 1) ** 2
    noise_pwr = max(1E-9, noise_pwr)
    d = np.concatenate((np.zeros((pad,)) + noise_pwr * np.random.randn(pad), d))
    _, vad_segments = ltsd_vad(d, fs, winsize=winsize)
    v_up = np.where(vad_segments == True)[0]
    s = v_up[0]
    st = v_up[-1] + int(.5 * fs)
    d = d[s:st]

    bname = "tapestry.wav".split(".")[0]
    wavfile.write("%s_out.wav" % bname, fs, soundsc(d))
项目:tools    作者:kastnerkyle    | 项目源码 | 文件源码
def run_ltsd_example():
    fs, d = fetch_sample_speech_tapestry()
    winsize = 1024
    d = d.astype("float32") / 2 ** 15
    d -= d.mean()

    pad = 3 * fs
    noise_pwr = np.percentile(d, 1) ** 2
    noise_pwr = max(1E-9, noise_pwr)
    d = np.concatenate((np.zeros((pad,)) + noise_pwr * np.random.randn(pad), d))
    _, vad_segments = ltsd_vad(d, fs, winsize=winsize)
    v_up = np.where(vad_segments == True)[0]
    s = v_up[0]
    st = v_up[-1] + int(.5 * fs)
    d = d[s:st]

    bname = "tapestry.wav".split(".")[0]
    wavfile.write("%s_out.wav" % bname, fs, soundsc(d))
项目:adagan    作者:tolstikhin    | 项目源码 | 文件源码
def _compute_data_weights_topk(self, opts, density_ratios):
        """Put a uniform distribution on K points with largest prob real data.

        This is a naiive heuristic which makes next GAN concentrate on those
        points of the training set, which were classified correctly with
        largest margins. I.e., out current mixture model is not capable of
        generating points looking similar to these ones.
        """
        threshold = np.percentile(density_ratios,
                                  opts["topk_constant"]*100.0)
        # Note that largest prob_real_data corresponds to smallest density
        # ratios.
        mask = density_ratios <= threshold
        data_weights = np.zeros(self._data_num)
        data_weights[mask] = 1.0 / np.sum(mask)
        return data_weights
项目:physt    作者:janpipek    | 项目源码 | 文件源码
def quantile_binning(data=None, bins=10, qrange=(0.0, 1.0), **kwargs):
    """Binning schema based on quantile ranges.

    This binning finds equally spaced quantiles. This should lead to
    all bins having roughly the same frequencies.

    Note: weights are not (yet) take into account for calculating
    quantiles.

    Parameters
    ----------
    bins: sequence or Optional[int]
        Number of bins
    qrange: Optional[tuple]
        Two floats as minimum and maximum quantile (default: 0.0, 1.0)

    Returns
    -------
    StaticBinning
    """

    if np.isscalar(bins):
        bins = np.linspace(qrange[0] * 100, qrange[1] * 100, bins + 1)
    bins = np.percentile(data, bins)
    return static_binning(bins=make_bin_array(bins), includes_right_edge=True)
项目:BayesVP    作者:cameronliang    | 项目源码 | 文件源码
def estimate_bayes_factor(traces, logp, r=0.05):
    """
    Esitmate Odds ratios in a random subsample of the chains in MCMC
    AstroML (see Eqn 5.127, pg 237)
    """

    ndim, nsteps = traces.shape # [ndim,number of steps in chain]

    # compute volume of a n-dimensional (ndim) sphere of radius r
    Vr = np.pi ** (0.5 * ndim) / gamma(0.5 * ndim + 1) * (r ** ndim)

    # use neighbor count within r as a density estimator
    bt = BallTree(traces.T)
    count = bt.query_radius(traces.T, r=r, count_only=True)

    # BF = N*p/rho
    bf = logp + np.log(nsteps) + np.log(Vr) - np.log(count) #log10(bf)

    p25, p50, p75 = np.percentile(bf, [25, 50, 75])
    return p50, 0.7413 * (p75 - p25)

########################################################################
项目:azmq    作者:ereOn    | 项目源码 | 文件源码
def reducer(self, key, values):
        if key == 'latencies':
            values = list(values)
            yield 'average_latency', numpy.average(values)
            yield 'median_latency', numpy.median(values)
            yield '95th_percentile_latency', numpy.percentile(values, 95)
        elif key == 'sent':
            first, last, count = minmax(values)
            yield 'count', count
            yield 'sending_first', first
            yield 'sending_last', last
            yield 'sending_overhead', (last - first) / (count - 1)
            yield 'sending_throughput', (count - 1) / (last - first)
        elif key == 'received':
            first, last, count = minmax(values)
            yield 'receiving_first', first
            yield 'receiving_last', last
            yield 'receiving_overhead', (last - first) / (count - 1)
            yield 'receiving_throughput', (count - 1) / (last - first)
项目:DeepMoji    作者:bfelbo    | 项目源码 | 文件源码
def calculate_batchsize_maxlen(texts):
    """ Calculates the maximum length in the provided texts and a suitable
        batch size. Rounds up maxlen to the nearest multiple of ten.

    # Arguments:
        texts: List of inputs.

    # Returns:
        Batch size,
        max length
    """
    def roundup(x):
        return int(math.ceil(x / 10.0)) * 10

    # Calculate max length of sequences considered
    # Adjust batch_size accordingly to prevent GPU overflow
    lengths = [len(tokenize(t)) for t in texts]
    maxlen = roundup(np.percentile(lengths, 80.0))
    batch_size = 250 if maxlen <= 100 else 50
    return batch_size, maxlen
项目:elfi    作者:elfi-dev    | 项目源码 | 文件源码
def ss_octile(y):
    """Obtain the octile summary statistic.

    The statistic reaches the optimal performance upon a high number of
    observations. According to Allingham et al. (2009), it is more stable than ss_robust.

    Parameters
    ----------
    y : array_like
        Yielded points.

    Returns
    -------
    array_like of the shape (batch_size, dim_ss=8, dim_ss_point)

    """
    octiles = np.linspace(12.5, 87.5, 7)
    E1, E2, E3, E4, E5, E6, E7 = np.percentile(y, octiles, axis=1)

    # Combining the summary statistics.
    ss_octile = np.hstack((E1, E2, E3, E4, E5, E6, E7))
    ss_octile = ss_octile[:, :, np.newaxis]
    return ss_octile
项目:py-hadoop-tutorial    作者:hougs    | 项目源码 | 文件源码
def flag_outliers(series, iqr_multiplier=1.5):
    """Use Tukey's boxplot criterion for outlier identification.
    """
    top_quartile_cutoff = np.percentile(series.get_values(), 75)
    bottom_quartile_cutoff = np.percentile(series.get_values(), 25)
    # Compute interquartile range
    iqr = top_quartile_cutoff - bottom_quartile_cutoff
    top_outlier_cutoff = top_quartile_cutoff + iqr * iqr_multiplier
    bottom_outlier_cutoff = bottom_quartile_cutoff - iqr * iqr_multiplier
    return series[(series < bottom_outlier_cutoff) | (series > top_outlier_cutoff)]


# In[ ]:
项目:retinex_for_mri    作者:ofgulban    | 项目源码 | 文件源码
def truncate_and_scale(data, percMin=0.01, percMax=99.9, zeroTo=1.0):
    """Truncate and scale the data as a preprocessing step.

    Parameters
    ----------
    data : nd numpy array
        Data/image to be truncated and scaled.
    percMin : float, positive
        Minimum percentile to be truncated.
    percMax : float, positive
        Maximum percentile to be truncated.
    zeroTo : float
        Data will be returned in the range from 0 to this number.

    Returns
    -------
    data : nd numpy array
        Truncated and scaled data/image.

    """
    # adjust minimum
    percDataMin = np.percentile(data, percMin)
    data[np.where(data < percDataMin)] = percDataMin
    data = data - data.min()

    # adjust maximum
    percDataMax = np.percentile(data, percMax)
    data[np.where(data > percDataMax)] = percDataMax
    data = 1./data.max() * data
    return data * zeroTo
项目:CASIA-HWDB1.1-cnn    作者:integeruser    | 项目源码 | 文件源码
def preprocess_bitmap(bitmap):
    # contrast stretching
    p2, p98 = np.percentile(bitmap, (2, 98))
    assert abs(p2-p98) > 10
    bitmap = skimage.exposure.rescale_intensity(bitmap, in_range=(p2, p98))

    # from skimage.filters import threshold_otsu
    # thresh = threshold_otsu(bitmap)
    # bitmap = bitmap > thresh
    return bitmap
项目:cellranger    作者:10XGenomics    | 项目源码 | 文件源码
def plot_tsne_totalcounts(chart, sample_properties, sample_data):
    """ Plot cells colored by total counts """
    analysis = sample_data.analysis
    if not analysis or len(sample_properties['genomes']) > 1:
        return None

    reads_per_bc = analysis.matrix.get_reads_per_bc()
    vmin, vmax = np.percentile(reads_per_bc, ws_gex_constants.TSNE_TOTALCOUNTS_PRCT_CLIP)

    return plot_dimensions_color(chart, analysis.get_tsne().transformed_tsne_matrix,
                                 reads_per_bc,
                                 ws_gex_constants.TSNE_TOTALCOUNTS_DESCRIPTION,
                                 vmin, vmax,
                                 1, 2)