Python numpy 模块,digitize() 实例源码

我们从Python开源项目中,提取了以下49个代码示例,用于说明如何使用numpy.digitize()

项目:tsbitmaps    作者:binhmop    | 项目源码 | 文件源码
def discretize(self, ts, bins=None, global_min=None, global_max=None):
        if bins is None:
            bins = self._bins

        if np.isscalar(bins):
            num_bins = bins

            min_value = ts.min()
            max_value = ts.max()
            if min_value == max_value:
                min_value = global_min
                max_value = global_max
            step = (max_value - min_value) / num_bins
            ts_bins = np.arange(min_value, max_value, step)
        else:
            ts_bins = bins

        inds = np.digitize(ts, ts_bins)
        binned_ts = tuple(str(i - 1) for i in inds)
        return binned_ts
项目:rain-metrics-python    作者:apendergrass    | 项目源码 | 文件源码
def makedists(pdata,binl):
    ##### This is called from within makeraindist.
    ##### Caclulate distributions 
    pds=pdata.shape;    nlat=pds[1];    nlon=pds[0];    nd=pds[2]
    bins=np.append(0,binl)
    n=np.empty((nlon,nlat,len(binl)))
    binno=np.empty(pdata.shape)
    for ilon in range(nlon):
        for ilat in range(nlat):
            # this is the histogram - we'll get frequency from this
            thisn,thisbin=np.histogram(pdata[ilon,ilat,:],bins) 
            n[ilon,ilat,:]=thisn
            # these are the bin locations. we'll use these for the amount dist
            binno[ilon,ilat,:]=np.digitize(pdata[ilon,ilat,:],bins) 
    #### Calculate the number of days with non-missing data, for normalization
    ndmat=np.tile(np.expand_dims(np.nansum(n,axis=2),axis=2),(1,1,len(bins)-1))
    thisppdfmap=n/ndmat
    #### Iterate back over the bins and add up all the precip - this will be the rain amount distribution
    testpamtmap=np.empty(thisppdfmap.shape)
    for ibin in range(len(bins)-1):
        testpamtmap[:,:,ibin]=(pdata*(ibin==binno)).sum(axis=2)
    thispamtmap=testpamtmap/ndmat
    return thisppdfmap,thispamtmap
项目:rain-metrics-python    作者:apendergrass    | 项目源码 | 文件源码
def makedists(pdata,binl):
    ##### This is called from within makeraindist.
    ##### Caclulate distributions 
    pds=pdata.shape;    nlat=pds[1];    nlon=pds[0];    nd=pds[2]
    bins=np.append(0,binl)
    n=np.empty((nlon,nlat,len(binl)))
    binno=np.empty(pdata.shape)
    for ilon in range(nlon):
        for ilat in range(nlat):
            # this is the histogram - we'll get frequency from this
            thisn,thisbin=np.histogram(pdata[ilon,ilat,:],bins) 
            n[ilon,ilat,:]=thisn
            # these are the bin locations. we'll use these for the amount dist
            binno[ilon,ilat,:]=np.digitize(pdata[ilon,ilat,:],bins) 
    #### Calculate the number of days with non-missing data, for normalization
    ndmat=np.tile(np.expand_dims(np.nansum(n,axis=2),axis=2),(1,1,len(bins)-1))
    thisppdfmap=n/ndmat
    #### Iterate back over the bins and add up all the precip - this will be the rain amount distribution
    testpamtmap=np.empty(thisppdfmap.shape)
    for ibin in range(len(bins)-1):
        testpamtmap[:,:,ibin]=(pdata*(ibin==binno)).sum(axis=2)
    thispamtmap=testpamtmap/ndmat
    return thisppdfmap,thispamtmap
项目:traffic_detection_yolo2    作者:wAuner    | 项目源码 | 文件源码
def set_responsibilities(anchor_frames, iou_thresh=0.6):
    """
    Changes the IOU values for the anchor frames to binary values

    anchor_frames: list of frames where each frame contains all features for a specific anchor
    iou_thresh: threshold to decide which anchor is responsible
    """
    # set box with maximum IOU to 1
    anchor_frames = [frame.copy() for frame in anchor_frames]
    # find maximum IOU value over all frames
    helper_array = np.array([frame[frame.columns[0]] for frame in anchor_frames]).T
    max_indices = np.argmax(helper_array, axis=1)
    data_idx = np.arange(len(max_indices))
    for obj_idx, frame_idx in zip(data_idx, max_indices):
        temp_frame = anchor_frames[frame_idx]
        temp_frame.loc[obj_idx, temp_frame.columns[0]] = 1

    # applying the iou threshold on a copy of the dataframes
    for frame in anchor_frames:
        frame[frame.columns[0]] = np.digitize(frame[frame.columns[0]], [iou_thresh])

    return anchor_frames
项目:brainiak    作者:brainiak    | 项目源码 | 文件源码
def _init_classes(self, y):
        """Map all possible classes to the range [0,..,C-1]

        Parameters
        ----------

        y : list of arrays of int, each element has shape=[samples_i,]
            Labels of the samples for each subject


        Returns
        -------
        new_y : list of arrays of int, each element has shape=[samples_i,]
            Mapped labels of the samples for each subject

        Note
        ----
            The mapping of the classes is saved in the attribute classes_.
        """
        self.classes_ = unique_labels(utils.concatenate_not_none(y))
        new_y = [None] * len(y)
        for s in range(len(y)):
            new_y[s] = np.digitize(y[s], self.classes_) - 1
        return new_y
项目:IDNNs    作者:ravidziv    | 项目源码 | 文件源码
def calc_information_sampling(data, bins, pys1, pxs, label, b, b1, len_unique_a, p_YgX, unique_inverse_x,
                              unique_inverse_y, calc_DKL=False):
    bins = bins.astype(np.float32)
    num_of_bins = bins.shape[0]
    # bins = stats.mstats.mquantiles(np.squeeze(data.reshape(1, -1)), np.linspace(0,1, num=num_of_bins))
    # hist, bin_edges = np.histogram(np.squeeze(data.reshape(1, -1)), normed=True)
    digitized = bins[np.digitize(np.squeeze(data.reshape(1, -1)), bins) - 1].reshape(len(data), -1)
    b2 = np.ascontiguousarray(digitized).view(
        np.dtype((np.void, digitized.dtype.itemsize * digitized.shape[1])))
    unique_array, unique_inverse_t, unique_counts = \
        np.unique(b2, return_index=False, return_inverse=True, return_counts=True)
    p_ts = unique_counts / float(sum(unique_counts))
    PXs, PYs = np.asarray(pxs).T, np.asarray(pys1).T
    if calc_DKL:
        pxy_given_T = np.array(
            [calc_probs(i, unique_inverse_t, label, b, b1, len_unique_a) for i in range(0, len(unique_array))]
        )
        p_XgT = np.vstack(pxy_given_T[:, 0])
        p_YgT = pxy_given_T[:, 1]
        p_YgT = np.vstack(p_YgT).T
        DKL_YgX_YgT = np.sum([inf_ut.KL(c_p_YgX, p_YgT.T) for c_p_YgX in p_YgX.T], axis=0)
        H_Xgt = np.nansum(p_XgT * np.log2(p_XgT), axis=1)
    local_IXT, local_ITY = calc_information_from_mat(PXs, PYs, p_ts, digitized, unique_inverse_x, unique_inverse_y,
                                                     unique_array)
    return local_IXT, local_ITY
项目:tensorpac    作者:EtienneCmb    | 项目源码 | 文件源码
def _kl_hr(pha, amp, nbins, optimize):
    """Binarize the amplitude according to phase values.

    This function is shared by the Kullback-Leibler Distance and the
    Height Ratio.
    """
    vecbin = np.linspace(-np.pi, np.pi, nbins + 1)
    phad = np.digitize(pha, vecbin) - 1

    abin = []
    for i in np.unique(phad):
        # Find where phase take vecbin values :
        idx = phad == i
        # Take the sum of amplitude inside the bin :
        abin_pha = np.einsum('i...j, k...j->ik...', amp, idx,
                             optimize=optimize)
        abin.append(abin_pha)

    return np.array(abin)
项目:nelpy    作者:nelpy    | 项目源码 | 文件源码
def _compute_ratemap(self, min_duration=None):

        if min_duration is None:
            min_duration = self._min_duration

        ext = self.trans_func(self._extern, at=self._bst.bin_centers)

        ext_bin_idx = np.digitize(ext, self.bins, True)
        # make sure that all the events fit between extmin and extmax:
        # TODO: this might rather be a warning, but it's a pretty serious warning...
        if ext_bin_idx.max() > self.n_bins:
            raise ValueError("ext values greater than 'ext_max'")
        if ext_bin_idx.min() == 0:
            raise ValueError("ext values less than 'ext_min'")

        ratemap = np.zeros((self.n_units, self.n_bins))

        for tt, bidx in enumerate(ext_bin_idx):
            ratemap[:,bidx-1] += self._bst.data[:,tt]

        # apply minimum observation duration
        for uu in range(self.n_units):
            ratemap[uu][self.occupancy*self._bst.ds < min_duration] = 0

        return ratemap / self._bst.ds
项目:yt    作者:yt-project    | 项目源码 | 文件源码
def __call__(self, data_object):
        orig_shape = data_object[self.x_name].shape
        x_vals = data_object[self.x_name].ravel().astype('float64')
        y_vals = data_object[self.y_name].ravel().astype('float64')

        x_i = (np.digitize(x_vals, self.x_bins) - 1).astype('int32')
        y_i = (np.digitize(y_vals, self.y_bins) - 1).astype('int32')
        if np.any((x_i == -1) | (x_i == len(self.x_bins)-1)) \
            or np.any((y_i == -1) | (y_i == len(self.y_bins)-1)):
            if not self.truncate:
                mylog.error("Sorry, but your values are outside" + \
                            " the table!  Dunno what to do, so dying.")
                mylog.error("Error was in: %s", data_object)
                raise ValueError
            else:
                x_i = np.minimum(np.maximum(x_i,0), len(self.x_bins)-2)
                y_i = np.minimum(np.maximum(y_i,0), len(self.y_bins)-2)

        my_vals = np.zeros(x_vals.shape, dtype='float64')
        lib.BilinearlyInterpolate(self.table,
                                 x_vals, y_vals, self.x_bins, self.y_bins,
                                 x_i, y_i, my_vals)
        my_vals.shape = orig_shape
        return my_vals
项目:gridded    作者:NOAA-ORR-ERD    | 项目源码 | 文件源码
def interpolation_alphas(self, points, *args, **kwargs):
        '''
        Returns a pair of values. The 1st value is an array of the depth indices of all the particles.
        The 2nd value is an array of the interpolation alphas for the particles between their depth
        index and depth_index+1. If both values are None, then all particles are on the surface layer.
        '''
        points = np.asarray(points, dtype=np.float64)
        points = points.reshape(-1, 3)
        underwater = points[:, 2] > 0
        if len(np.where(underwater)[0]) == 0:
            return None, None
        indices = -np.ones((len(points)), dtype=np.int64)
        alphas = -np.ones((len(points)), dtype=np.float64)
        pts = points[underwater]
        und_ind = -np.ones((len(np.where(underwater)[0])))
        und_alph = und_ind.copy()
        und_ind = np.digitize(pts[:,2], self.depth_levels) - 1
        for i,n in enumerate(und_ind):
            if n == len(self.depth_levels) -1:
                und_ind[i] = -1
            if und_ind[i] != -1:
                und_alph[i] = (pts[i,2] - self.depth_levels[und_ind[i]]) / (self.depth_levels[und_ind[i]+1] - self.depth_levels[und_ind[i]])
        indices[underwater] = und_ind
        alphas[underwater] = und_alph
        return indices, alphas
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def setUpClass(self):
        from sklearn.datasets import load_boston
        from sklearn.tree import DecisionTreeClassifier

        # Load data and train model
        import numpy as np
        scikit_data = load_boston()
        self.X = scikit_data.data.astype('f').astype('d') ## scikit-learn downcasts data
        t = scikit_data.target
        num_classes = 3
        target = np.digitize(t, np.histogram(t, bins = num_classes - 1)[1]) - 1

        # Save the data and the model
        self.scikit_data = scikit_data
        self.target = target
        self.feature_names = scikit_data.feature_names
        self.output_name = 'target'
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston
        from sklearn.ensemble import RandomForestClassifier
        import numpy as np

        scikit_data = load_boston()
        scikit_model = RandomForestClassifier(random_state = 1)
        t = scikit_data.target
        target = np.digitize(t, np.histogram(t)[1]) - 1
        scikit_model.fit(scikit_data.data, target)

        # Save the data and the model
        self.scikit_data = scikit_data
        self.target = target
        self.scikit_model = scikit_model
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def setUpClass(self):
        from sklearn.datasets import load_boston

        # Load data and train model
        import numpy as np
        scikit_data = load_boston()
        num_classes = 3
        self.X = scikit_data.data.astype('f').astype('d') ## scikit-learn downcasts data
        t = scikit_data.target
        target = np.digitize(t, np.histogram(t, bins = num_classes - 1)[1]) - 1

        # Save the data and the model
        self.scikit_data = scikit_data
        self.target = target
        self.feature_names = scikit_data.feature_names
        self.output_name = 'target'
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston
        from sklearn.tree import DecisionTreeClassifier
        from sklearn.preprocessing import MultiLabelBinarizer
        import numpy as np

        scikit_data = load_boston()
        scikit_model = DecisionTreeClassifier(random_state = 1)
        t = scikit_data.target
        target = np.digitize(t, np.histogram(t)[1]) - 1
        scikit_model.fit(scikit_data.data, target)

        # Save the data and the model
        self.scikit_data = scikit_data
        self.target = target
        self.scikit_model = scikit_model
项目:coremltools    作者:apple    | 项目源码 | 文件源码
def setUpClass(self):
        """
        Set up the unit test by loading the dataset and training a model.
        """
        from sklearn.datasets import load_boston
        import numpy as np

        scikit_data = load_boston()
        scikit_model = GradientBoostingClassifier(random_state = 1)
        t = scikit_data.target
        target = np.digitize(t, np.histogram(t)[1]) - 1
        scikit_model.fit(scikit_data.data, target)
        self.target = target

        # Save the data and the model
        self.scikit_data = scikit_data
        self.scikit_model = scikit_model
项目:Thrifty    作者:swkrueger    | 项目源码 | 文件源码
def auto_classify_transmitters(detections):
    """Identify transmitter IDs based on carrier frequency."""
    # Split by receiver
    detections_by_rx = defaultdict(list)
    for detection in detections:
        detections_by_rx[detection.rxid].append(detection)

    edges = {}
    for rxid, rx_detections in detections_by_rx.iteritems():
        freqs = np.array([d.carrier_info.bin for d in rx_detections])
        rx_edges = detect_transmitter_windows(freqs)

        summary = ("Detected {} transmitter(s) at RX {}:"
                   .format(len(rx_edges) - 1, rxid))
        for i in range(len(rx_edges) - 1):
            summary += " {}-{}".format(rx_edges[i], rx_edges[i+1] - 1)
        print(summary)

        edges[rxid] = rx_edges[:-1]

    txids = [np.digitize(d.carrier_info.bin, edges[d.rxid]) - 1
             for d in detections]

    return txids
项目:pymod    作者:pymodproject    | 项目源码 | 文件源码
def assign_dope_items(self, selection):
        # Builds a list of all DOPE values of the residues in the selection.
        ldope = []
        for chain_element in selection:
            ldope.extend(chain_element.dope_scores)
        # Takes the min and max values among all the selected residues.
        min_value = min(ldope)
        max_value = max(ldope)
        # An array with the equally sapced limits generated with the list above.
        bins = numpy.array(numpy.linspace(min_value, max_value, num=10))
        for chain_element in selection:
            # An array with all the DOPE values of a single chain in the selection.
            adope = numpy.array(chain_element.dope_scores)
            # An array with the id of the bins where those values reside.
            inds = numpy.digitize(adope, bins)
            # Returns a list like:
            # [(-0.052, 4), (-0.03, 3), (-0.04, 5), (-0.04, 6), (-0.041, 7), (-0.042, 8), (-0.043, 10), ...]
            # which contains for all standard residues of a polypeptidic chain a tuple. The
            # first value of the tuple is the DOPE score of that residues, the second is the id
            # (going from 1 to 10) of the bin where that value resides.
            chain_element.dope_items = []
            for dope_score, bin_id in zip(adope, inds):# zip(ldope, inds):
                chain_element.dope_items.append({"dope-score":dope_score, "interval": bin_id})
项目:harpreif    作者:harpribot    | 项目源码 | 文件源码
def __update_state(self):
        """
        Updates the state space (self.gamestate) after the suggested action is taken
        :return: None
        """
        jigsaw_id, place_id = self.decode_action()
        self.__update_placed_pieces(jigsaw_id, place_id)
        if self.state_type == 'hog':
            self.__render_gamestate()
        elif self.state_type == 'image':
            resized_discrete_im = np.digitize(
                            imresize(self.jigsaw_image, (self.state_height, self.state_width)),
                            self.bins)
            self.gamestate = np.array([resized_discrete_im]).transpose().swapaxes(0, 1)

        else:
            ValueError('The state type is not valid, enter "hog" or "image"')
项目:hypertools    作者:ContextLab    | 项目源码 | 文件源码
def vals2colors(vals,cmap='GnBu_d',res=100):
    """Maps values to colors
    Args:
    values (list or list of lists) - list of values to map to colors
    cmap (str) - color map (default is 'husl')
    res (int) - resolution of the color map (default: 100)
    Returns:
    list of rgb tuples
    """
    # flatten if list of lists
    if any(isinstance(el, list) for el in vals):
        vals = list(itertools.chain(*vals))

    # get palette from seaborn
    palette = np.array(sns.color_palette(cmap, res))
    ranks = np.digitize(vals, np.linspace(np.min(vals), np.max(vals)+1, res+1)) - 1
    return [tuple(i) for i in palette[ranks, :]]
项目:ugali    作者:DarkEnergySurvey    | 项目源码 | 文件源码
def reverseHistogram(data,bins=None):
    """             
    Bins data using numpy.histogram and calculates the
    reverse indices for the entries like IDL.
    Parameters:
    data  : data to pass to numpy.histogram
    bins  : bins to pass to numpy.histogram 
    Returns: 
    hist  : bin content output by numpy.histogram 
    edges : edges output from numpy.histogram 
    rev   : reverse indices of entries in each bin 
    Using Reverse Indices: 
        h,e,rev = histogram(data, bins=bins) 
        for i in range(h.size):  
            if rev[i] != rev[i+1]: 
                # data points were found in this bin, get their indices
                indices = rev[ rev[i]:rev[i+1] ] 
                # do calculations with data[indices] ...  
    """
    if bins is None: bins = numpy.arange(data.max()+2)
    hist, edges = numpy.histogram(data, bins=bins)
    digi = numpy.digitize(data.flat,bins=numpy.unique(data)).argsort()
    rev = numpy.hstack( (len(edges), len(edges) + numpy.cumsum(hist), digi) )
    return hist,edges,rev
项目:WaveNet-Enhancement    作者:auspicious3000    | 项目源码 | 文件源码
def run_semi_online(self, sess, inputs_clean, inputs_noisy, num_samples):
        dump = sess.run(self.init_ops, 
                        feed_dict={self.history_clean: inputs_clean[:,0:self.len_pad+1]})
        skips_noisy_sum = sess.run(self.skips_noisy_sum, 
                                   feed_dict={self.inputs_noisy: inputs_noisy})
        indices = inputs_clean[:,self.len_pad:self.len_pad+1]
        predictions_ = []
        for step in xrange(num_samples):
            #indices = inputs_clean[:,self.len_pad+step:self.len_pad+1+step]
            feed_dict = feed_dict={self.inputs_clean: indices,
                                   self.skips_noisy: skips_noisy_sum[:,:,step]}
            output_dist = sess.run(self.out_ops, feed_dict=feed_dict)[0]
            #indices = np.argmax(output_dist, axis=1)[:,None]
            #inputs = self.bins_center[indices[:,0]].astype(np.float32)
            inputs = np.matmul(output_dist, self.bins_center).astype(np.float32)
            indices = np.digitize(inputs, self.bins_edge, right=False)[:,None]
            predictions_.append(indices)

        predictions = np.concatenate(predictions_, axis=1)
        dump = sess.run(self.dequ_ops)

        return predictions
项目:WaveNet-Enhancement    作者:auspicious3000    | 项目源码 | 文件源码
def run_semi_online_v2(sess,
                       out_ops,
                       skips_noisy_batch, 
                       indices, 
                       inputs_noisy, 
                       num_samples):
    skips_noisy_sum = sess.run(skips_noisy_batch)
    predictions_ = []
    for step in xrange(num_samples):
        feed_dict = feed_dict={self.inputs_clean: indices,
                               self.skips_noisy: skips_noisy_sum[:,:,step]}
        output_dist = sess.run([out_ops], feed_dict=feed_dict)[0]
        #output dim = 1 x 256, it is 2D but we need 1D input to argmax
        indices = random_bins(NUM_CLASSES, output_dist)
        inputs = self.bins[indices]
        #inputs = np.array(np.matmul(output_dist,self.bins), dtype=np.float32)[:,None]
        #indices = np.digitize(inputs[:,0], self.bins, right=False)[:,None]
        predictions_.append(inputs)
项目:spyking-circus-ort    作者:spyking-circus    | 项目源码 | 文件源码
def compute_unnormalized_crosscorrelogram(a, b, nb_bins=101, width=100e-3, f=0.0, **kwargs):
        """Compute the un-normalized cross-correlogram"""

        bin_width = width / float(nb_bins)
        start = - width / 2.0
        stop = + width / 2.0
        bins = np.linspace(start, stop, nb_bins + 1)
        values = np.zeros(nb_bins, dtype=np.int)
        for v in a:
            d = b - v - f * bin_width
            is_selected = np.abs(d) < width / 2.0
            d = d[is_selected]
            indices = np.digitize(d, bins) - 1
            values[indices] += 1
        if 't_min' in kwargs and 't_max' in kwargs:
            t_min, t_max = [kwargs[key] for key in ['t_min', 't_max']]
            if t_min is not None and t_max is not None:
                values = values.astype(np.float) / (t_max - t_min)
        bins = bins * 1e+3
        values = np.append(values, [values[-1]])

        return bins, values
项目:spyking-circus-ort    作者:spyking-circus    | 项目源码 | 文件源码
def compute_unnormalized_crosscorrelogram(a, b, nb_bins=101, width=100e-3, f=0.0, **kwargs):
        """Compute the un-normalized cross-correlogram"""

        bin_width = width / float(nb_bins)
        start = - width / 2.0
        stop = + width / 2.0
        bins = np.linspace(start, stop, nb_bins + 1)
        values = np.zeros(nb_bins, dtype=np.int)
        for v in a:
            d = b - v - f * bin_width
            is_selected = np.abs(d) < width / 2.0
            d = d[is_selected]
            indices = np.digitize(d, bins) - 1
            values[indices] += 1
        if 't_min' in kwargs and 't_max' in kwargs:
            t_min, t_max = [kwargs[key] for key in ['t_min', 't_max']]
            if t_min is not None and t_max is not None:
                values = values.astype(np.float) / (t_max - t_min)
        bins = bins * 1e+3
        values = np.append(values, [values[-1]])

        return bins, values
项目:sims_featureScheduler    作者:lsst    | 项目源码 | 文件源码
def roundx(x, y, binstart=0.1):
    """Round off to try and grid-up nearly gridded data
    """
    bins = np.arange(x.min(), x.max()+binstart, binstart)
    counts, bin_edges = np.histogram(x, bins=bins)

    # merge together bins that are nighboring and have counts
    new_bin_edges = []
    new_bin_edges.append(bin_edges[0])
    for i, b in enumerate(bin_edges[1:]):
        if (counts[i] > 0) & (counts[i-1] > 0):
            pass
        else:
            new_bin_edges.append(bin_edges[i])
    if bin_edges[-1] != new_bin_edges[-1]:
        new_bin_edges.append(bin_edges[-1])
    indx = np.digitize(x, new_bin_edges)
    new_bin_edges = np.array(new_bin_edges)
    bin_centers = (new_bin_edges[1:]-new_bin_edges[:-1])/2. + new_bin_edges[:-1]
    new_x = bin_centers[indx-1]
    return new_x
项目:Benchmarks    作者:ECP-CANDLE    | 项目源码 | 文件源码
def evaluate_model(model, generator, steps, metric, category_cutoffs=[0.]):
    y_true, y_pred = None, None
    count = 0
    while count < steps:
        x_batch, y_batch = next(generator)
        y_batch_pred = model.predict_on_batch(x_batch)
        y_batch_pred = y_batch_pred.ravel()
        y_true = np.concatenate((y_true, y_batch)) if y_true is not None else y_batch
        y_pred = np.concatenate((y_pred, y_batch_pred)) if y_pred is not None else y_batch_pred
        count += 1

    loss = evaluate_keras_metric(y_true.astype(np.float32), y_pred.astype(np.float32), metric)

    y_true_class = np.digitize(y_true, category_cutoffs)
    y_pred_class = np.digitize(y_pred, category_cutoffs)

    # theano does not like integer input
    acc = evaluate_keras_metric(y_true_class.astype(np.float32), y_pred_class.astype(np.float32), 'binary_accuracy')  # works for multiclass labels as well

    return loss, acc, y_true, y_pred, y_true_class, y_pred_class
项目:neural-slack-bot    作者:juliakreutzer    | 项目源码 | 文件源码
def put_in_buckets(data_array, labels, buckets, mode='pad'):
    """
    Given bucket edges and data, put the data in buckets according to their length
    :param data_array:
    :param labels:
    :param buckets:
    :return:
    """
    input_lengths = np.array([len(s) for s in data_array], dtype='int')
    input_bucket_index = [i if i<len(buckets) else len(buckets)-1 for i in np.digitize(input_lengths, buckets, right=False)]  # during testing, longer sentences are just truncated
    if mode == 'truncate':
        input_bucket_index -= 1
    bucketed_data = {}
    reordering_indexes = {}
    for bucket in list(np.unique(input_bucket_index)):
        length_indexes = np.where(input_bucket_index == bucket)[0]
        reordering_indexes[bucket] = length_indexes
        maxlen = int(np.floor(buckets[bucket]))
        padded = pad_data(data_array[length_indexes], labels[length_indexes], max_len=maxlen)
        bucketed_data[bucket] = padded  # in final dict, start counting by zero
    return bucketed_data, reordering_indexes
项目:xam    作者:MaxHalford    | 项目源码 | 文件源码
def transform(self, X, y=None):
        """Binarize X based on the fitted cut points."""

        # scikit-learn checks
        X = check_array(X)

        if self.cut_points is None:
            raise NotFittedError('Estimator not fitted, call `fit` before exploiting the model.')

        if X.shape[1] != len(self.cut_points):
            raise ValueError("Provided array's dimensions do not match with the ones from the "
                             "array `fit` was called on.")

        binned = np.array([
            np.digitize(x, self.cut_points[i])
            if len(self.cut_points[i]) > 0
            else np.zeros(x.shape)
            for i, x in enumerate(X.T)
        ]).T

        return binned
项目:grocsvs    作者:grocsvs    | 项目源码 | 文件源码
def get_bg_mats(fragsx, fragsy, sv_region, window_size):
    bg_mats = {}
    selectors = {"+":"end_pos", "-":"start_pos"}

    binsx = numpy.arange(sv_region["startx"], sv_region["endx"]+window_size, window_size)
    binsy = numpy.arange(sv_region["starty"], sv_region["endy"]+window_size, window_size)

    for orientationx in "+-":
        binx = numpy.digitize(fragsx[selectors[orientationx]], binsx)-1
        gx = fragsx.groupby(binx)
        bcsx = [set(gx.get_group(k)["bc"]) if k in gx.groups else set() for k in range(len(binsx))]

        for orientationy in "+-":
            biny = numpy.digitize(fragsy[selectors[orientationy]], binsy)-1
            gy = fragsy.groupby(biny)
            bcsy = [set(gy.get_group(k)["bc"]) if k in gy.groups else set() for k in range(len(binsy))]

            bg_mats[orientationx+orientationy] = get_bg_mat(bcsx, bcsy)

    return bg_mats
项目:decoding_challenge_cortana_2016_3rd    作者:kingjr    | 项目源码 | 文件源码
def _digitize(x, bins, right=False):
    """Replacement for digitize with right kwarg (numpy < 1.7).

    Notes
    -----
    This fix is only meant for integer arrays. If ``right==True`` but either
    ``x`` or ``bins`` are of a different type, a NotImplementedError will be
    raised.
    """
    if right:
        x = np.asarray(x)
        bins = np.asarray(bins)
        if (x.dtype.kind not in 'ui') or (bins.dtype.kind not in 'ui'):
            raise NotImplementedError("Only implemented for integer input")
        return np.digitize(x - 1e-5, bins)
    else:
        return np.digitize(x, bins)
项目:klusta    作者:kwikteam    | 项目源码 | 文件源码
def _index_of(arr, lookup):
    """Replace scalars in an array by their indices in a lookup table.

    Implicitely assume that:

    * All elements of arr and lookup are non-negative integers.
    * All elements or arr belong to lookup.

    This is not checked for performance reasons.

    """
    # Equivalent of np.digitize(arr, lookup) - 1, but much faster.
    # TODO: assertions to disable in production for performance reasons.
    # TODO: np.searchsorted(lookup, arr) is faster on small arrays with large
    # values
    lookup = np.asarray(lookup, dtype=np.int32)
    m = (lookup.max() if len(lookup) else 0) + 1
    tmp = np.zeros(m + 1, dtype=np.int)
    # Ensure that -1 values are kept.
    tmp[-1] = -1
    if len(lookup):
        tmp[lookup] = np.arange(len(lookup))
    return tmp[arr]
项目:faampy    作者:ncasuk    | 项目源码 | 文件源码
def _windbarbs(u, v, press, delta):
    #delta = 2500 # equals 25mb
    p_bin_min = int((np.min(press) // delta) * delta)
    p_bin_max = int(((np.max(press) // delta)+1) * delta)
    p_bins = np.array(range(p_bin_min, p_bin_max, delta))

    ixs = np.digitize(press, p_bins)
    uwind = [np.mean(u[ixs == ix]) for ix in list(set(ixs))]
    vwind = [np.mean(v[ixs == ix]) for ix in list(set(ixs))]

    ax = plt.gca()
    inv = ax.transLimits.inverted()
    #x_pos, _none = inv.transform((0.92, 0))
    x_pos = inv.transform(np.array([[0.92,0]]))[0, 0]
    baraxis = [x_pos] * len(p_bins) 
    plt.barbs(baraxis, p_bins, uwind, vwind, \
              barb_increments=barb_increments, linewidth = .75)#, transform=ax.transAxes)
项目:BDD_Driving_Model    作者:gy20073    | 项目源码 | 文件源码
def pdf_bins_batch(bins, prob, querys):
    assert (len(bins) == len(prob) + 1)

    querys = np.array(querys)
    bins = np.array(bins)
    idx = np.digitize(querys, bins[1:-1])

    # get the mass
    masses = prob[idx]

    if FLAGS.pdf_normalize_bins:
        # get the x bin length
        xlen = bins[idx + 1] - bins[idx]
        return masses / xlen
    else:
        return masses
项目:mindpark    作者:danijar    | 项目源码 | 文件源码
def _plot_line(self, ax, domain, line, label, color, marker):
        order = np.argsort(domain)
        domain, line = domain[order], line[order]

        borders = np.linspace(domain[0], domain[-1], self._resolution)
        borders = np.digitize(borders, domain)
        domain = np.linspace(domain[0], domain[-1], len(borders) - 1)
        lower_ = aggregate(line, borders, lambda x: np.percentile(x, 10, 0)[0])
        middle = aggregate(line, borders, lambda x: np.percentile(x, 50, 0)[0])
        upper_ = aggregate(line, borders, lambda x: np.percentile(x, 90, 0)[0])

        ax.fill_between(
            domain, upper_, lower_, facecolor=color, edgecolor=color,
            **self.AREA)
        ax.plot(
            domain, middle, c=color, label=label)
项目:segmentator    作者:ofgulban    | 项目源码 | 文件源码
def map_ima_to_2D_hist(xinput, yinput, bins_arr):
    """Image to volume histogram mapping (kind of inverse histogram).

    Parameters
    ----------
    xinput : TODO
        First image, which is often the intensity image (eg. T1w).
    yinput : TODO
        Second image, which is often the gradient magnitude image
        derived from the first image.
    bins_arr : TODO
        Array of bins.

    Returns
    -------
    vox2pixMap : TODO
        Voxel to pixel mapping.

    """
    dgtzData = np.digitize(xinput, bins_arr)-1
    dgtzGra = np.digitize(yinput, bins_arr)-1
    nr_bins = len(bins_arr)-1  # subtract 1 (more borders than containers)
    vox2pixMap = sub2ind(nr_bins, dgtzData, dgtzGra)  # 1D
    return vox2pixMap
项目:ytree    作者:brittonsmith    | 项目源码 | 文件源码
def _node_io_loop(self, func, *args, **kwargs):
        root_nodes = kwargs.pop("root_nodes", None)
        if root_nodes is None:
            root_nodes = self.trees
        opbar = kwargs.pop("pbar", None)

        ai = np.array([node._ai for node in root_nodes])
        dfi = np.digitize(ai, self._ei)
        udfi = np.unique(dfi)

        for i in udfi:
            if opbar is not None:
                kwargs["pbar"] = "%s [%d/%d]" % (opbar, i+1, udfi.size)
            my_nodes = root_nodes[dfi == i]
            kwargs["root_nodes"] = my_nodes
            kwargs["fcache"] = {}

            fn = "%s_%04d%s" % (self._prefix, i, self._suffix)
            f = h5py.File(fn, "r")
            kwargs["f"] = f
            super(YTreeArbor, self)._node_io_loop(func, *args, **kwargs)
            f.close()
项目:radar    作者:amoose136    | 项目源码 | 文件源码
def test_mem_digitize(self, level=rlevel):
        # Ticket #95
        for i in range(100):
            np.digitize([1, 2, 3, 4], [1, 3])
            np.digitize([0, 1, 2, 3, 4], [1, 3])
项目:introspective    作者:numeristical    | 项目源码 | 文件源码
def plot_reliability_diagram(y,x,bins=np.linspace(0,1,21),size_points=True, show_baseline=True,ax=None, marker='+',c='red', **kwargs):
    if ax is None:
        ax = _gca()
        fig = ax.get_figure()
    digitized_x = np.digitize(x, bins)
    mean_count_array = np.array([[np.mean(y[digitized_x == i]),len(y[digitized_x == i]),np.mean(x[digitized_x==i])] for i in np.unique(digitized_x)])
    if show_baseline:
        ax.plot(np.linspace(0,1,100),(np.linspace(0,1,100)),'k--')
    for i in range(len(mean_count_array[:,0])):
        if size_points:
            plt.scatter(mean_count_array[i,2],mean_count_array[i,0],s=mean_count_array[i,1],marker=marker,c=c, **kwargs)
        else: 
            plt.scatter(mean_count_array[i,2],mean_count_array[i,0], **kwargs)
    plt.axis([-0.1,1.1,-0.1,1.1])
    return(mean_count_array[:,2],mean_count_array[:,0],mean_count_array[:,1])
项目:Epileptic-Seizure-Prediction    作者:cedricsimar    | 项目源码 | 文件源码
def group_into_bands(self, fft, fft_freq, nfreq_bands):

        """ 
        Group the fft result by frequency bands and take the mean
        of the fft values within each band

        Return a list of the frequency bands' means (except the first element
        which is the frequency band 0 - 0.1Hz)
        """

        freq_bands = np.digitize(fft_freq, FREQUENCIES)
        df = DataFrame({'fft': fft, 'band': freq_bands})
        df = df.groupby('band').mean()
        return df.fft[1:-1]
项目:MCEvidence    作者:yabebalFantaye    | 项目源码 | 文件源码
def weighted_thin(weights,thin_unit):
    '''
    Given a weight array, perform thinning.
    If the all weights are equal, this should 
    be equivalent to selecting every N/((thinfrac*N)
    where N=len(weights).
    '''

    N=len(weights)
    if thin_unit==0: return range(N),weights

    if thin_unit<1:
        N2=np.int(N*thin_unit)
    else:
        N2=N//thin_unit

    #bin the weight index to have the desired length
    #this defines the bin edges
    bins = np.linspace(-1, N, N2+1) 
    #this collects the indices of the weight array in each bin
    ind = np.digitize(np.arange(N), bins)  
    #this gets the maximum weight in each bin
    thin_ix=pd.Series(weights).groupby(ind).idxmax().tolist()
    thin_ix=np.array(thin_ix,dtype=np.intp)
    logger.info('Thinning with weighted binning: thinfrac={}. new_nsamples={},old_nsamples={}'.format(thin_unit,len(thin_ix),len(w)))

    return {'ix':thin_ix, 'w':weights[thin_ix]}
项目:IDNNs    作者:ravidziv    | 项目源码 | 文件源码
def calc_information_for_layer(data, bins, unique_inverse_x, unique_inverse_y, pxs, pys1):
    bins = bins.astype(np.float32)
    digitized = bins[np.digitize(np.squeeze(data.reshape(1, -1)), bins) - 1].reshape(len(data), -1)
    b2 = np.ascontiguousarray(digitized).view(
        np.dtype((np.void, digitized.dtype.itemsize * digitized.shape[1])))
    unique_array, unique_inverse_t, unique_counts = \
        np.unique(b2, return_index=False, return_inverse=True, return_counts=True)
    p_ts = unique_counts / float(sum(unique_counts))
    PXs, PYs = np.asarray(pxs).T, np.asarray(pys1).T
    local_IXT, local_ITY = calc_information_from_mat(PXs, PYs, p_ts, digitized, unique_inverse_x, unique_inverse_y,
                                                     unique_array)
    return local_IXT, local_ITY
项目:IDNNs    作者:ravidziv    | 项目源码 | 文件源码
def calc_all_sigams(data, sigmas):
    batchs = 128
    num_of_bins = 8
    # bins = np.linspace(-1, 1, num_of_bins).astype(np.float32)
    # bins = stats.mstats.mquantiles(np.squeeze(data.reshape(1, -1)), np.linspace(0,1, num=num_of_bins))
    # data = bins[np.digitize(np.squeeze(data.reshape(1, -1)), bins) - 1].reshape(len(data), -1)

    batch_points = np.rint(np.arange(0, data.shape[0] + 1, batchs)).astype(dtype=np.int32)
    I_XT = []
    num_of_rand = min(800, data.shape[1])
    for sigma in sigmas:
        # print sigma
        I_XT_temp = 0
        for i in range(0, len(batch_points) - 1):
            new_data = data[batch_points[i]:batch_points[i + 1], :]
            rand_indexs = np.random.randint(0, new_data.shape[1], num_of_rand)
            new_data = new_data[:, :]
            N = new_data.shape[0]
            d = new_data.shape[1]
            diff_mat = np.linalg.norm(((new_data[:, np.newaxis, :] - new_data)), axis=2)
            # print diff_mat.shape, new_data.shape
            s0 = 0.2
            # DOTO -add leaveoneout validation
            res = minimize(optimiaze_func, s0, args=(diff_mat, d, N), method='nelder-mead',
                           options={'xtol': 1e-8, 'disp': False, 'maxiter': 6})
            eta = res.x
            diff_mat0 = - 0.5 * (diff_mat / (sigma ** 2 + eta ** 2))
            diff_mat1 = np.sum(np.exp(diff_mat0), axis=0)
            diff_mat2 = -(1.0 / N) * np.sum(np.log2((1.0 / N) * diff_mat1))
            I_XT_temp += diff_mat2 - d * np.log2((sigma ** 2) / (eta ** 2 + sigma ** 2))
            # print diff_mat2 - d*np.log2((sigma**2)/(eta**2+sigma**2))
        I_XT_temp /= len(batch_points)
        I_XT.append(I_XT_temp)
    sys.stdout.flush()
    return I_XT
项目:nelpy    作者:nelpy    | 项目源码 | 文件源码
def _compute_ratemap(self, min_duration=None):
        """

        min_duration is the min duration in seconds for a bin to be
        considered 'valid'; if too few observations were made, then the
        firing rate is kept at an estimate of 0. If min_duration == 0,
        then all the spikes are used.
        """

        if min_duration is None:
            min_duration = self._min_duration

        x, y = self.trans_func(self._extern, at=self._bst.bin_centers)

        ext_bin_idx_x = np.digitize(x, self.xbins, True)
        ext_bin_idx_y = np.digitize(y, self.ybins, True)

        # make sure that all the events fit between extmin and extmax:
        # TODO: this might rather be a warning, but it's a pretty serious warning...
        if ext_bin_idx_x.max() > self.n_xbins:
            raise ValueError("ext values greater than 'ext_xmax'")
        if ext_bin_idx_x.min() == 0:
            raise ValueError("ext values less than 'ext_xmin'")
        if ext_bin_idx_y.max() > self.n_ybins:
            raise ValueError("ext values greater than 'ext_ymax'")
        if ext_bin_idx_y.min() == 0:
            raise ValueError("ext values less than 'ext_ymin'")

        ratemap = np.zeros((self.n_units, self.n_xbins, self.n_ybins))

        for tt, (bidxx, bidxy) in enumerate(zip(ext_bin_idx_x, ext_bin_idx_y)):
            ratemap[:,bidxx-1, bidxy-1] += self._bst.data[:,tt]

        # apply minimum observation duration
        for uu in range(self.n_units):
            ratemap[uu][self.occupancy*self._bst.ds < min_duration] = 0

        return ratemap / self._bst.ds
项目:legends-of-erukar    作者:etkirsch    | 项目源码 | 文件源码
def create_one(self):
        type_to_create = self.values[numpy.digitize(numpy.random.uniform(0, 1), self.bins)]
        return self.create_type(type_to_create)
项目:legends-of-erukar    作者:etkirsch    | 项目源码 | 文件源码
def get_from_custom_distribution(random_value, bins, values):
        return values[np.digitize(random_value, bins)]
项目:PySAT    作者:USGS-Astrogeology    | 项目源码 | 文件源码
def spl_interp(xa, ya, y2a, x):
    n = xa.size

    #    valloc=baseline_code.value_locate.value_locate(xa, x)
    valloc = numpy.digitize(x,
                            xa) - 1  # The numpy routine digitize appears to basically do what value_locate does in IDL
    klo = []
    for i in valloc:
        klo.append(min(max(i, 0), (n - 2)))
    klo = numpy.array(klo)
    khi = klo + 1
    #
    # KLO and KHI now bracket the input value of X
    #

    if min(xa[khi] - xa[klo]) == 0: print('SPLINT - XA inputs must be distinct')
    #
    # Cubic spline polynomial is now evaluated
    #
    h = xa[khi] - xa[klo]

    a = (xa[khi] - x) / h
    b = (x - xa[klo]) / h
    output = a * ya[klo] + b * ya[khi] + ((a ** 3. - a) * y2a[klo] + (b ** 3. - b) * y2a[khi]) * (h ** 2.) / 6.
    return output

# spl_interp.pro
项目:krpcScripts    作者:jwvanderbeck    | 项目源码 | 文件源码
def test_mem_digitize(self, level=rlevel):
        # Ticket #95
        for i in range(100):
            np.digitize([1, 2, 3, 4], [1, 3])
            np.digitize([0, 1, 2, 3, 4], [1, 3])
项目:yt    作者:yt-project    | 项目源码 | 文件源码
def map_to_colors(buff, cmap_name):
    try:
        lut = cmd.color_map_luts[cmap_name]
    except KeyError:
        try:
            # if cmap is tuple, then we're using palettable or brewer2mpl cmaps
            if isinstance(cmap_name, tuple):
                cmap = get_brewer_cmap(cmap_name)
            else:
                cmap = mcm.get_cmap(cmap_name)
            cmap(0.0)
            lut = cmap._lut.T
        except ValueError:
            raise KeyError(
                "Your color map (%s) was not found in either the extracted"
                " colormap file or matplotlib colormaps" % cmap_name)

    if isinstance(cmap_name, tuple):
        # If we are using the colorbrewer maps, don't interpolate
        shape = buff.shape
        # We add float_eps so that digitize doesn't go out of bounds
        x = np.mgrid[0.0:1.0+np.finfo(np.float32).eps:lut[0].shape[0]*1j]
        inds = np.digitize(buff.ravel(), x)
        inds.shape = (shape[0], shape[1])
        mapped = np.dstack([(v[inds]*255).astype('uint8') for v in lut])
        del inds
    else:
        x = np.mgrid[0.0:1.0:lut[0].shape[0]*1j]
        mapped = np.dstack(
                [(np.interp(buff, x, v)*255).astype('uint8') for v in lut ])
    return mapped.copy("C")
项目:yt    作者:yt-project    | 项目源码 | 文件源码
def _calculate_file_offset_map(self):
        # After the FOF is performed, a load-balancing step redistributes halos
        # and then writes more fields.  Here, for each file, we create a list of
        # files which contain the rest of the redistributed particles.
        ifof = np.array([data_file.total_particles["Group"]
                         for data_file in self.data_files])
        isub = np.array([data_file.total_offset
                         for data_file in self.data_files])
        subend = isub.cumsum()
        fofend = ifof.cumsum()
        istart = np.digitize(fofend - ifof, subend - isub) - 1
        iend = np.clip(np.digitize(fofend, subend), 0, ifof.size - 2)
        for i, data_file in enumerate(self.data_files):
            data_file.offset_files = self.data_files[istart[i]: iend[i] + 1]