Python numpy 模块,histogram() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用numpy.histogram()

项目:Homology_BG    作者:jyotikab    | 项目源码 | 文件源码
def mypsd(Rates,time_range,bin_w = 5., nmax = 4000):

      bins = np.arange(0,len(time_range),1)
      #print bins
      a,b = np.histogram(Rates, bins)
      ff = (1./len(bins))*abs(np.fft.fft(Rates- np.mean(Rates)))**2
      Fs = 1./(1*0.001)
      freq2 = np.fft.fftfreq(len(bins))[0:len(bins/2)+1] # d= dt
      freq = np.fft.fftfreq(len(bins))[:len(ff)/2+1]
      px = ff[0:len(ff)/2+1]
      max_px = np.max(px[1:])
      idx = px == max_px
      corr_freq = freq[pl.find(idx)]
      new_px = px
      max_pow = new_px[pl.find(idx)]
      return new_px,freq,corr_freq[0],freq2, max_pow
项目:pybot    作者:spillai    | 项目源码 | 文件源码
def get_histogram(self, data): 
        """
        Project the descriptions on to the codebook/vocabulary, 
        returning the histogram of words
        [N x 1] => [1 x K] histogram
        """
        if self.method == 'vq' or self.method == 'bow': 
            code = self.get_code(data)
            code_hist = self.bow(data, code, self.K)
        elif self.method == 'vlad': 
            code = self.get_code(data)
            code_hist = self.vlad(data, code)
        elif self.method == 'fisher': 
            code = self.get_code(data)
            code_hist = self.fisher(data, code)
        else: 
            raise NotImplementedError('''Histogram method %s not implemented. '''
                                      '''Use vq/bow or vlad or fisher!''' % self.method)            
        return code_hist
项目:tensorboard    作者:dmlc    | 项目源码 | 文件源码
def histogram(name, values, bins, collections=None):
    # pylint: disable=line-too-long
    """Outputs a `Summary` protocol buffer with a histogram.
    The generated
    [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto)
    has one summary value containing a histogram for `values`.
    This op reports an `InvalidArgument` error if any value is not finite.
    Args:
      name: A name for the generated node. Will also serve as a series name in
        TensorBoard.
      values: A real numeric `Tensor`. Any shape. Values to use to
        build the histogram.
      collections: Optional list of graph collections keys. The new summary op is
        added to these collections. Defaults to `[GraphKeys.SUMMARIES]`.
    Returns:
      A scalar `Tensor` of type `string`. The serialized `Summary` protocol
      buffer.
    """
    name = _clean_tag(name)
    values = makenp(values)
    hist = make_histogram(values.astype(float), bins)
    return Summary(value=[Summary.Value(tag=name, histo=hist)])
项目:NeoAnalysis    作者:neoanalysis    | 项目源码 | 文件源码
def modeFilter(data, window=500, step=None, bins=None):
    """Filter based on histogram-based mode function"""
    d1 = data.view(np.ndarray)
    vals = []
    l2 = int(window/2.)
    if step is None:
        step = l2
    i = 0
    while True:
        if i > len(data)-step:
            break
        vals.append(mode(d1[i:i+window], bins))
        i += step

    chunks = [np.linspace(vals[0], vals[0], l2)]
    for i in range(len(vals)-1):
        chunks.append(np.linspace(vals[i], vals[i+1], step))
    remain = len(data) - step*(len(vals)-1) - l2
    chunks.append(np.linspace(vals[-1], vals[-1], remain))
    d2 = np.hstack(chunks)

    if (hasattr(data, 'implements') and data.implements('MetaArray')):
        return MetaArray(d2, info=data.infoCopy())
    return d2
项目:NeoAnalysis    作者:neoanalysis    | 项目源码 | 文件源码
def modeFilter(data, window=500, step=None, bins=None):
    """Filter based on histogram-based mode function"""
    d1 = data.view(np.ndarray)
    vals = []
    l2 = int(window/2.)
    if step is None:
        step = l2
    i = 0
    while True:
        if i > len(data)-step:
            break
        vals.append(mode(d1[i:i+window], bins))
        i += step

    chunks = [np.linspace(vals[0], vals[0], l2)]
    for i in range(len(vals)-1):
        chunks.append(np.linspace(vals[i], vals[i+1], step))
    remain = len(data) - step*(len(vals)-1) - l2
    chunks.append(np.linspace(vals[-1], vals[-1], remain))
    d2 = np.hstack(chunks)

    if (hasattr(data, 'implements') and data.implements('MetaArray')):
        return MetaArray(d2, info=data.infoCopy())
    return d2
项目:rain-metrics-python    作者:apendergrass    | 项目源码 | 文件源码
def makedists(pdata,binl):
    ##### This is called from within makeraindist.
    ##### Caclulate distributions 
    pds=pdata.shape;    nlat=pds[1];    nlon=pds[0];    nd=pds[2]
    bins=np.append(0,binl)
    n=np.empty((nlon,nlat,len(binl)))
    binno=np.empty(pdata.shape)
    for ilon in range(nlon):
        for ilat in range(nlat):
            # this is the histogram - we'll get frequency from this
            thisn,thisbin=np.histogram(pdata[ilon,ilat,:],bins) 
            n[ilon,ilat,:]=thisn
            # these are the bin locations. we'll use these for the amount dist
            binno[ilon,ilat,:]=np.digitize(pdata[ilon,ilat,:],bins) 
    #### Calculate the number of days with non-missing data, for normalization
    ndmat=np.tile(np.expand_dims(np.nansum(n,axis=2),axis=2),(1,1,len(bins)-1))
    thisppdfmap=n/ndmat
    #### Iterate back over the bins and add up all the precip - this will be the rain amount distribution
    testpamtmap=np.empty(thisppdfmap.shape)
    for ibin in range(len(bins)-1):
        testpamtmap[:,:,ibin]=(pdata*(ibin==binno)).sum(axis=2)
    thispamtmap=testpamtmap/ndmat
    return thisppdfmap,thispamtmap
项目:rain-metrics-python    作者:apendergrass    | 项目源码 | 文件源码
def makedists(pdata,binl):
    ##### This is called from within makeraindist.
    ##### Caclulate distributions 
    pds=pdata.shape;    nlat=pds[1];    nlon=pds[0];    nd=pds[2]
    bins=np.append(0,binl)
    n=np.empty((nlon,nlat,len(binl)))
    binno=np.empty(pdata.shape)
    for ilon in range(nlon):
        for ilat in range(nlat):
            # this is the histogram - we'll get frequency from this
            thisn,thisbin=np.histogram(pdata[ilon,ilat,:],bins) 
            n[ilon,ilat,:]=thisn
            # these are the bin locations. we'll use these for the amount dist
            binno[ilon,ilat,:]=np.digitize(pdata[ilon,ilat,:],bins) 
    #### Calculate the number of days with non-missing data, for normalization
    ndmat=np.tile(np.expand_dims(np.nansum(n,axis=2),axis=2),(1,1,len(bins)-1))
    thisppdfmap=n/ndmat
    #### Iterate back over the bins and add up all the precip - this will be the rain amount distribution
    testpamtmap=np.empty(thisppdfmap.shape)
    for ibin in range(len(bins)-1):
        testpamtmap[:,:,ibin]=(pdata*(ibin==binno)).sum(axis=2)
    thispamtmap=testpamtmap/ndmat
    return thisppdfmap,thispamtmap
项目:pyspark_dist_explore    作者:Bergvca    | 项目源码 | 文件源码
def add_column(self, table):
        """Add single column DataFrame to the histogram object.

        If multiple columns share the same name, a (n) will be appended to the name, where n is
        the next available number.

        Args:
            :table: (:obj:`dataframe`)
                A PySpark DataFrame with a single column

        """
        if len(table.columns) > 1:
            raise ValueError('More then one column is being added, use add_data() to add multi-column DataFrames')

        column_name = table.columns[0]

        if not isinstance(table.schema.fields[0].dataType, NumericType):
            raise ValueError('Column %s has a non-numeric type (%s), only numeric types are supported'
                             % (column_name, str(table.schema.fields[0].dataType)))

        self.col_list.append((table, column_name))
项目:pyspark_dist_explore    作者:Bergvca    | 项目源码 | 文件源码
def to_pandas(self, kind='hist'):
        """Returns a pandas dataframe from the Histogram object.

        This function calculates the Histogram function in Spark if it was not done yet.

        Args:
            :kind: (:obj:`str`, optional):
                'hist' or 'density'. When using hist this returns the histogram object
                as pandas dataframe. When using density the index contains the bin centers, and the values in the
                DataFrame are the scaled values. Defaults to 'hist'

        Returns:
            A pandas DataFrame from the Histogram object.
        """
        self.build()
        if kind == 'hist':
            return pd.DataFrame(self.hist_dict).set_index([self._get_col_names()])
        elif kind == 'density':
            result = pd.DataFrame(self.hist_dict).set_index([self._get_bin_centers()])
            return result.apply(lambda x: x / x.max(), axis=0)
项目:pyspark_dist_explore    作者:Bergvca    | 项目源码 | 文件源码
def add_data(self, data):
        """Ads 1 or more columns to a histogram.

        Multiple options are available:
            * Add a single column dataframe
            * Add a list of single column dataframes
            * Add a dataframe with multiple columns

        Args:
            :data:
                A single column Spark dataframe, a list of single column Spark
                dataframes, or a multi column Spark dataframe.
        """
        if isinstance(data, list):
            for df_column in data:
                self.add_column(df_column)

        elif len(data.columns) > 1:
            for col_name in data.columns:
                self.add_column(data.select(col_name))

        else:
            self.add_column(data)
项目:pyspark_dist_explore    作者:Bergvca    | 项目源码 | 文件源码
def add_column(self, table):
        """Add single column DataFrame to the histogram object.

        If multiple columns share the same name, a (n) will be appended to the name, where n is
        the next available number.

        Args:
            table (:obj:`dataframe`): A pyspark dataframe with a single column

        """
        if len(table.columns) > 1:
            raise ValueError('More then one column is being added, use add_data() to add multi-column DataFrames')

        column_name = table.columns[0]

        if not isinstance(table.schema.fields[0].dataType, NumericType):
            raise ValueError('Column %s has a non-numeric type (%s), only numeric types are supported'
                             % (column_name, str(table.schema.fields[0].dataType)))

        self.col_list.append((table, column_name))
项目:pyspark_dist_explore    作者:Bergvca    | 项目源码 | 文件源码
def to_pandas(self, kind='hist'):
        """Returns a pandas dataframe from the Histogram object.

        This function calculates the Histogram function in Spark if it was not done yet.

        Args:
            kind (:obj:`str`, optional): 'hist' or 'density'. When using hist this returns the histogram object
            as pandas dataframe. When using density the index contains the bin centers, and the values in the
            dataframe are the scaled values. Defaults to 'hist'

        Returns:
            A pandas DataFrame from the Histogram object.
        """
        self.build()
        if kind == 'hist':
            return pd.DataFrame(self.hist_dict).set_index([self._get_col_names()])
        elif kind == 'density':
            result = pd.DataFrame(self.hist_dict).set_index([self._get_bin_centers()])
            return result.apply(lambda x: x / x.max(), axis=0)
项目:pyspark_dist_explore    作者:Bergvca    | 项目源码 | 文件源码
def add_data(self, data):
        """Ads 1 or more columns to a histogram

        Multiple options are available:
        * Add a single column dataframe
        * Add a list of single column dataframes
        * Add a dataframe with multiple columns

        Args:
            (:obj:`Data`): A single column Spark dataframe, a list of single column Spark
            dataframes, or a multi column Spark dataframe.
        """
        if isinstance(data, list):
            for df_column in data:
                self.add_column(df_column)

        elif len(data.columns) > 1:
            for col_name in data.columns:
                self.add_column(data.select(col_name))

        else:
            self.add_column(data)
项目:DVH    作者:glucee    | 项目源码 | 文件源码
def calculate_plane_histogram(plane, doseplane, dosegridpoints,
                              maxdose, dd, id, structure, hist):
    """Calculate the DVH for the given plane in the structure."""
    contours = [[x[0:2] for x in c['data']] for c in plane]

    # If there is no dose for the current plane, go to the next plane
    if not len(doseplane):
        return (np.arange(0, maxdose), 0)

    # Create a zero valued bool grid
    grid = np.zeros((dd['rows'], dd['columns']), dtype=np.uint8)

    # Calculate the histogram for each contour in the plane
    # and boolean xor to remove holes
    for i, contour in enumerate(contours):
        m = get_contour_mask(dd, id, dosegridpoints, contour)
        grid = np.logical_xor(m.astype(np.uint8), grid).astype(np.bool)

    hist, vol = calculate_contour_dvh(
        grid, doseplane, maxdose, dd, id, structure)
    return (hist, vol)
项目:DVH    作者:glucee    | 项目源码 | 文件源码
def calculate_contour_dvh(mask, doseplane, maxdose, dd, id, structure):
    """Calculate the differential DVH for the given contour and dose plane."""
    # Multiply the structure mask by the dose plane to get the dose mask
    mask = ma.array(doseplane * dd['dosegridscaling'] * 100, mask=~mask)
    # Calculate the differential dvh
    hist, edges = np.histogram(mask.compressed(),
                               bins=maxdose,
                               range=(0, maxdose))

    # Calculate the volume for the contour for the given dose plane
    vol = sum(hist) * ((id['pixelspacing'][0]) *
                       (id['pixelspacing'][1]) *
                       (structure['thickness']))
    return hist, vol

# ========================== Test DVH Calculation =========================== #
项目:cg    作者:michaelhabeck    | 项目源码 | 文件源码
def rdf(coords, bins=100, r_max=None):
    """
    Radial distribution function

    Parameters
    ----------

    coords :
      list of coordinate arrays

    bins : int or numpy array
      distance bins

    r_max : positive float or None
      maximum distance
    """
    if np.ndim(coords) == 2: coords = [coords]

    d = np.sqrt(np.concatenate(map(calc_distances, coords), 0))
    if r_max is not None: d = d[d<r_max]

    g, bins = np.histogram(d, bins=bins)
    r = 0.5 * (bins[1:]+bins[:-1])

    return r, g/r**2
项目:Eskapade    作者:KaveIO    | 项目源码 | 文件源码
def get_hist_val(self, var_value):
        """Get bin count for bin by value of histogram variable

        :param var_value: a specific value to find corresponding bin.
        :returns: bin counter value
        :rtype: int
        """

        try:
            bin_label = self.value_to_bin_label(var_value)
        except Exception as exc:
            self.log().error(
                'bin label for variable value "%s" not found (%s)',
                str(var_value),
                exc.message)
            return 0
        return self.get_bin_count(bin_label)
项目:Eskapade    作者:KaveIO    | 项目源码 | 文件源码
def to_normalized(self, **kwargs):
        """Return a normalized copy of this histogram

        :param str new_var_name: assign new variable name
        :param list variable_range: variable range used for finding the right bins to get values from.
        :param bool combine_values: if bin_specs is not set, combine existing bin labels with variable range.
        """

        # convert to normalized histogram
        new_var_name = str(kwargs.pop('variable', self.variable))
        bin_vals = self.get_bin_vals(**kwargs)
        values = np.float64(bin_vals[0]) / bin_vals[0].sum()
        # When values is a numpy array of 1 element np.float64() returns a 0-dimensional array. See
        # https://github.com/numpy/numpy/issues/3161. The following
        # if-statement is a workaround for this issue.
        if not values.shape:
            values = values.reshape((1,))
        return Histogram(counts=(values, bin_vals[1]), variable=new_var_name)
项目:Eskapade    作者:KaveIO    | 项目源码 | 文件源码
def _from_numpy(self, counts, bin_edges):
        """Create Histogram from NumPy-style histogram

        :param array counts: numpy histogram counts array
        :param array bin_edges: bin edges
        """

        # initialize from NumPy-style histogram
        _check_num_vals(counts)
        if len(counts) == len(bin_edges) - 1:
            # interpret specified variable values as bin edges
            del self._bin_specs
            self.bin_specs = {'bin_edges': list(bin_edges)}
            bin_edges = list(range(len(counts)))
        elif len(counts) != len(bin_edges):
            # cannot interpret specified variable values as bin values
            self.log().critical('numbers of specified variable values (%d) and value counts (%d) do not match',
                                len(bin_edges), len(counts))
            raise AssertionError('specified variable values and value counts do not match')
        self._val_counts = ValueCounts((self.variable,), (self.variable,),
                                       dict(((v,), c) for c, v in zip(counts, bin_edges)))
项目:Eskapade    作者:KaveIO    | 项目源码 | 文件源码
def to_root_hist(histogram, **kwargs):
    """Convert Eskapade histogram to root histogram

    Input Eskapade histogram first gets converted to a numpy histogram,
    which is then converted to a root histogram.  All kwargs besides the
    input histograms are passed on to histogram.get_bin_vals(), which makes
    the numpy histogram.

    :param histogram: input Eskapade histogram
    :returns: root histogram
    :rtype: ROOT.TH1
    """
    if not isinstance(histogram, Histogram):
        raise TypeError('histogram not of type %s' % Histogram)
    # convert to ROOT histogram
    new_var_name = str(kwargs.pop('variable', histogram.variable))
    return bin_vals_to_hist(histogram.get_bin_vals(**kwargs), var_name=new_var_name)
项目:Eskapade    作者:KaveIO    | 项目源码 | 文件源码
def hist_to_bin_vals(hist):
    """Convert root histogram to numpy bin_vals

    Create bin_counts and bin_edges lists, similar to np.histogram()
    function.

    :param ROOT.TH1 hist: input root histogram, assumed to be 1-dimensional.
    :returns: two comma-separated arrays: bin_entries, bin_edges
    """

    # check input type
    assert isinstance(hist, ROOT.TH1), 'root hist needs to be 1-dimensional'

    # create bin_counts and bin_edges lists, similar to np.histogram() function
    bin_entries = []
    bin_edges = []
    n_bins = hist.GetNbinsX()
    for i in range(n_bins):
        bin_entries.append(hist.GetBinContent(i + 1))
        bin_edges.append(hist.GetBinLowEdge(i + 1))
    bin_edges.append(hist.GetBinLowEdge(n_bins + 1))

    return bin_entries, bin_edges
项目:wikilinks    作者:trovdimi    | 项目源码 | 文件源码
def plot_entropy_distribution():
    fig = plt.figure()
    ax = fig.add_subplot(111)

    entropy = read_pickle('output/normalized_entropy.obj')

    hist, bin_edges = np.histogram(entropy, bins=10000)
    print hist, bin_edges

    #ax.set_yscale('log')
    #ax.set_xscale('log')
    ax.plot(bin_edges[:-1], hist, marker='o', markersize=3, markeredgecolor='none', color='#D65F5F')

    #ax.set_ylim([10**0, 10**6])
    #ax.set_xlim([10**0, 10**6])
    ax.set_xlabel('Entropy')
    ax.set_ylabel('Frequency')

    fig.tight_layout()
    fig.savefig( 'output/normalized_entropy_distribution.pdf', bbox_inches='tight')
项目:radar    作者:amoose136    | 项目源码 | 文件源码
def test_outliers(self):
        # Check that outliers are not tallied
        a = np.arange(10) + .5

        # Lower outliers
        h, b = histogram(a, range=[0, 9])
        assert_equal(h.sum(), 9)

        # Upper outliers
        h, b = histogram(a, range=[1, 10])
        assert_equal(h.sum(), 9)

        # Normalization
        h, b = histogram(a, range=[1, 9], normed=True)
        assert_almost_equal((h * diff(b)).sum(), 1, decimal=15)

        # Weights
        w = np.arange(10) + .5
        h, b = histogram(a, range=[1, 9], weights=w, normed=True)
        assert_equal((h * diff(b)).sum(), 1)

        h, b = histogram(a, bins=8, range=[1, 9], weights=w)
        assert_equal(h, w[1:-1])
项目:radar    作者:amoose136    | 项目源码 | 文件源码
def test_simple(self):
        """
        Straightforward testing with a mixture of linspace data (for
        consistency). All test values have been precomputed and the values
        shouldn't change
        """
        # Some basic sanity checking, with some fixed data.
        # Checking for the correct number of bins
        basic_test = {50:   {'fd': 4,  'scott': 4,  'rice': 8,  'sturges': 7, 
                             'doane': 8, 'sqrt': 8, 'auto': 7},
                      500:  {'fd': 8,  'scott': 8,  'rice': 16, 'sturges': 10,
                             'doane': 12, 'sqrt': 23, 'auto': 10},
                      5000: {'fd': 17, 'scott': 17, 'rice': 35, 'sturges': 14,
                             'doane': 17, 'sqrt': 71, 'auto': 17}}

        for testlen, expectedResults in basic_test.items():
            # Create some sort of non uniform data to test with
            # (2 peak uniform mixture)
            x1 = np.linspace(-10, -1, testlen // 5 * 2)
            x2 = np.linspace(1, 10, testlen // 5 * 3)
            x = np.concatenate((x1, x2))
            for estimator, numbins in expectedResults.items():
                a, b = np.histogram(x, estimator)
                assert_equal(len(a), numbins, err_msg="For the {0} estimator "
                             "with datasize of {1}".format(estimator, testlen))
项目:radar    作者:amoose136    | 项目源码 | 文件源码
def test_small(self):
        """
        Smaller datasets have the potential to cause issues with the data
        adaptive methods, especially the FD method. All bin numbers have been
        precalculated.
        """
        small_dat = {1: {'fd': 1, 'scott': 1, 'rice': 1, 'sturges': 1,
                         'doane': 1, 'sqrt': 1},
                     2: {'fd': 2, 'scott': 1, 'rice': 3, 'sturges': 2,
                         'doane': 1, 'sqrt': 2},
                     3: {'fd': 2, 'scott': 2, 'rice': 3, 'sturges': 3,
                         'doane': 3, 'sqrt': 2}}

        for testlen, expectedResults in small_dat.items():
            testdat = np.arange(testlen)
            for estimator, expbins in expectedResults.items():
                a, b = np.histogram(testdat, estimator)
                assert_equal(len(a), expbins, err_msg="For the {0} estimator "
                             "with datasize of {1}".format(estimator, testlen))
项目:radar    作者:amoose136    | 项目源码 | 文件源码
def test_outlier(self):
        """
        Check the FD, Scott and Doane with outliers.

        The FD estimates a smaller binwidth since it's less affected by
        outliers. Since the range is so (artificially) large, this means more
        bins, most of which will be empty, but the data of interest usually is
        unaffected. The Scott estimator is more affected and returns fewer bins,
        despite most of the variance being in one area of the data. The Doane
        estimator lies somewhere between the other two.
        """
        xcenter = np.linspace(-10, 10, 50)
        outlier_dataset = np.hstack((np.linspace(-110, -100, 5), xcenter))

        outlier_resultdict = {'fd': 21, 'scott': 5, 'doane': 11}

        for estimator, numbins in outlier_resultdict.items():
            a, b = np.histogram(outlier_dataset, estimator)
            assert_equal(len(a), numbins)
项目:radar    作者:amoose136    | 项目源码 | 文件源码
def _hist_bin_sqrt(x):
    """
    Square root histogram bin estimator.

    Bin width is inversely proportional to the data size. Used by many
    programs for its simplicity.

    Parameters
    ----------
    x : array_like
        Input data that is to be histogrammed, trimmed to range. May not
        be empty.

    Returns
    -------
    h : An estimate of the optimal bin width for the given data.
    """
    return x.ptp() / np.sqrt(x.size)
项目:radar    作者:amoose136    | 项目源码 | 文件源码
def _hist_bin_sturges(x):
    """
    Sturges histogram bin estimator.

    A very simplistic estimator based on the assumption of normality of
    the data. This estimator has poor performance for non-normal data,
    which becomes especially obvious for large data sets. The estimate
    depends only on size of the data.

    Parameters
    ----------
    x : array_like
        Input data that is to be histogrammed, trimmed to range. May not
        be empty.

    Returns
    -------
    h : An estimate of the optimal bin width for the given data.
    """
    return x.ptp() / (np.log2(x.size) + 1.0)
项目:radar    作者:amoose136    | 项目源码 | 文件源码
def _hist_bin_rice(x):
    """
    Rice histogram bin estimator.

    Another simple estimator with no normality assumption. It has better
    performance for large data than Sturges, but tends to overestimate
    the number of bins. The number of bins is proportional to the cube
    root of data size (asymptotically optimal). The estimate depends
    only on size of the data.

    Parameters
    ----------
    x : array_like
        Input data that is to be histogrammed, trimmed to range. May not
        be empty.

    Returns
    -------
    h : An estimate of the optimal bin width for the given data.
    """
    return x.ptp() / (2.0 * x.size ** (1.0 / 3))
项目:radar    作者:amoose136    | 项目源码 | 文件源码
def _hist_bin_scott(x):
    """
    Scott histogram bin estimator.

    The binwidth is proportional to the standard deviation of the data
    and inversely proportional to the cube root of data size
    (asymptotically optimal).

    Parameters
    ----------
    x : array_like
        Input data that is to be histogrammed, trimmed to range. May not
        be empty.

    Returns
    -------
    h : An estimate of the optimal bin width for the given data.
    """
    return (24.0 * np.pi**0.5 / x.size)**(1.0 / 3.0) * np.std(x)
项目:IDNNs    作者:ravidziv    | 项目源码 | 文件源码
def calc_information_sampling(data, bins, pys1, pxs, label, b, b1, len_unique_a, p_YgX, unique_inverse_x,
                              unique_inverse_y, calc_DKL=False):
    bins = bins.astype(np.float32)
    num_of_bins = bins.shape[0]
    # bins = stats.mstats.mquantiles(np.squeeze(data.reshape(1, -1)), np.linspace(0,1, num=num_of_bins))
    # hist, bin_edges = np.histogram(np.squeeze(data.reshape(1, -1)), normed=True)
    digitized = bins[np.digitize(np.squeeze(data.reshape(1, -1)), bins) - 1].reshape(len(data), -1)
    b2 = np.ascontiguousarray(digitized).view(
        np.dtype((np.void, digitized.dtype.itemsize * digitized.shape[1])))
    unique_array, unique_inverse_t, unique_counts = \
        np.unique(b2, return_index=False, return_inverse=True, return_counts=True)
    p_ts = unique_counts / float(sum(unique_counts))
    PXs, PYs = np.asarray(pxs).T, np.asarray(pys1).T
    if calc_DKL:
        pxy_given_T = np.array(
            [calc_probs(i, unique_inverse_t, label, b, b1, len_unique_a) for i in range(0, len(unique_array))]
        )
        p_XgT = np.vstack(pxy_given_T[:, 0])
        p_YgT = pxy_given_T[:, 1]
        p_YgT = np.vstack(p_YgT).T
        DKL_YgX_YgT = np.sum([inf_ut.KL(c_p_YgX, p_YgT.T) for c_p_YgX in p_YgX.T], axis=0)
        H_Xgt = np.nansum(p_XgT * np.log2(p_XgT), axis=1)
    local_IXT, local_ITY = calc_information_from_mat(PXs, PYs, p_ts, digitized, unique_inverse_x, unique_inverse_y,
                                                     unique_array)
    return local_IXT, local_ITY
项目:nanopores    作者:mitschabaude    | 项目源码 | 文件源码
def fit_koff(nmax=523, NN=4e8, **params):
    tbind = params.pop("tbind")
    params["kd"] = 1e9/tbind
    dx = params.pop("dx")
    rw = randomwalk.get_rw(NAME, params, setup=setup_rw, calc=True)
    rw.domains[1].dx = dx
    times = draw_empirically(rw, N=NN, nmax=nmax, success=False)
    bins = np.logspace(np.log10(min(times)), np.log10(max(times)), 35)
    #bins = np.logspace(-3., 2., 35)
    hist, _ = np.histogram(times, bins=bins)
    cfd = np.cumsum(hist)/float(np.sum(hist))
    t = 0.5*(bins[:-1] + bins[1:])
    tmean = times.mean()
    toff = NLS(t, cfd, t0=tmean)
    koff = 1./toff
    return dict(t=t, cfd=cfd, toff=toff, tmean=tmean, koff=koff)

##### run rw in collect mode and draw bindings from empirical distributions
项目:PCL-ROS-cluster-Segmentation    作者:jupidity    | 项目源码 | 文件源码
def compute_normal_histograms(normal_cloud):
    norm_x_vals = []
    norm_y_vals = []
    norm_z_vals = []
    numBins = 64

    for norm_component in pc2.read_points(normal_cloud,
                                          field_names = ('normal_x', 'normal_y', 'normal_z'),
                                          skip_nans=True):
        norm_x_vals.append(norm_component[0])
        norm_y_vals.append(norm_component[1])
        norm_z_vals.append(norm_component[2])

    # Compute histograms for the normals in the point cloud
    norm1_hist = np.histogram(norm_x_vals, bins=numBins, range=(0, 256))
    norm2_hist = np.histogram(norm_y_vals, bins=numBins, range=(0, 256))
    norm3_hist = np.histogram(norm_z_vals, bins=numBins, range=(0, 256))


    # Concatenate and normalize the histograms
    norm_hist_features = np.concatenate((norm1_hist[0],norm2_hist[0], norm3_hist[0])).astype(np.float64)
    norm_features = norm_hist_features / np.sum(norm_hist_features)
    return norm_features
项目:fexum    作者:KDD-OpenSource    | 项目源码 | 文件源码
def build_histogram(feature_id, bins=50):
    feature = Feature.objects.get(pk=feature_id)

    if feature.is_categorical:
        bins = len(feature.categories)

    # Only read column with that name
    dataframe = _get_dataframe(feature.dataset.id)

    bin_set = []
    bins, bin_edges = np.histogram(dataframe[feature.name], bins=bins)
    for bin_index, bin_value in enumerate(bins):
        from_value = bin_edges[bin_index]
        to_value = bin_edges[bin_index + 1]
        bin = Bin(
            feature=feature,
            from_value=from_value,
            to_value=to_value,
            count=bin_value
        )
        bin_set.append(bin)
    Bin.objects.bulk_create(bin_set)

    del bins, bin_edges, bin_set
项目:dicompyler-core    作者:dicompyler    | 项目源码 | 文件源码
def from_data(cls, data, binsize=1):
        """Initialization for a DVH from raw data.

        Parameters
        ----------
        data : iterable or numpy array
            An iterable of dose data that is used to create the histogram
        binsize : int, optional
            Bin width size (in cGy used to create the histogram)
        """
        data = np.array(data)
        bins = np.arange(0, data.max() + 1, binsize)
        if bins.size == 1:
            bins = np.array([0, data.max()])
        if data.max() not in bins:
            bins = np.append(bins, data.max())
        counts, bins = np.histogram(data, bins)

        return cls(counts, bins)
项目:CSB    作者:csb-toolbox    | 项目源码 | 文件源码
def density(x, nbins, normalize=True):
    """
    Histogram of univariate input data: basically calls numpy's histogram method and
    does a proper normalization.

    @param x: input numpy array
    @param nbins: number of bins
    @type nbins: integer
    @param normalize: if true, histogram will be normalized
    """
    from numpy import histogram

    hy, hx = histogram(x, nbins)
    hx = 0.5 * (hx[1:] + hx[:-1])
    hy = hy.astype('d')
    if normalize:
        hy /= (hx[1] - hx[0]) * hy.sum()

    return hx, hy
项目:tensorboard_logger    作者:TeamHG-Memex    | 项目源码 | 文件源码
def log_histogram(self, name, value, step=None):
        """Log a histogram for given name on given step.

        Args:
            name (str): name of the variable (it will be converted to a valid
                tensorflow summary name).
            value (tuple or list): either list of numbers
                to be summarized as a histogram, or a tuple of bin_edges and
                bincounts that directly define a histogram.
            step (int): non-negative integer used for visualization
        """
        if isinstance(value, six.string_types):
            raise TypeError('"value" should be a number, got {}'
                            .format(type(value)))

        self._check_step(step)
        tf_name = self._ensure_tf_name(name)

        summary = self._histogram_summary(tf_name, value, step=step)
        self._log_summary(tf_name, summary, value, step=step)
项目:tbp-next-basket    作者:GiulioRossetti    | 项目源码 | 文件源码
def estimate_basket_length(baskets):
    basket_lengths = list()
    basket_ids = baskets['data']

    for basket_id in basket_ids:
        basket = baskets['data'][basket_id]['basket']

        basket_len = len(basket)
        basket_lengths.append(basket_len)

    if len(basket_lengths) <= 10:
        return int(np.round(np.median(basket_lengths)))

    nbr_bins = np.round(estimate_nbr_bins(basket_lengths))
    val, bins = np.histogram(basket_lengths, bins=nbr_bins)
    ebl = int(np.round(bins[np.argmax(val)]))
    ebl = ebl + 1 if ebl == 1 else ebl

    return ebl
项目:tbp-next-basket    作者:GiulioRossetti    | 项目源码 | 文件源码
def estimate_month_basket_length(baskets):
    month_basket_lenght = [[] for x in xrange(12)]

    basket_ids = baskets['data']

    for basket_id in basket_ids:
        date_object = datetime.datetime.strptime(basket_id[0:10], '%Y_%m_%d')
        basket = baskets['data'][basket_id]['basket']
        month_id = date_object.month - 1

        basket_len = len(basket)
        month_basket_lenght[month_id].append(basket_len)

    month_ebl = list()
    for month_id in xrange(12):
        nbr_bins = estimate_nbr_bins(month_basket_lenght[month_id])
        nbr_bins = np.round(nbr_bins)
        val, bins = np.histogram(month_basket_lenght[month_id], bins=nbr_bins)
        mebl = int(np.round(bins[np.argmax(val)]))
        mebl = mebl + 1 if mebl == 1 else mebl
        month_ebl.append(mebl)

    return month_ebl
项目:dataset-shift-osdc16    作者:pprett    | 项目源码 | 文件源码
def generate_data(sample_size=200, pd=[[0.4, 0.4], [0.1, 0.1]]):
    pd = np.array(pd)
    pd /= pd.sum()
    offset = 50
    bins = np.r_[np.zeros((1,)), np.cumsum(pd)]
    bin_counts = np.histogram(np.random.rand(sample_size), bins)[0]
    data = np.empty((0, 2))
    targets = []
    for ((i, j), p), count in zip(np.ndenumerate(pd), bin_counts):
        xs = np.random.uniform(low=0.0, high=50.0, size=count) + j * offset
        ys = np.random.uniform(low=0.0, high=50.0, size=count) + -i * offset
        data = np.vstack((data, np.c_[xs, ys]))
        if i == j:
            targets.extend([1] * count)
        else:
            targets.extend([-1] * count)
    return np.c_[data, targets]
项目:nelpy    作者:nelpy    | 项目源码 | 文件源码
def get_mode_pth_from_array(posterior, tuningcurve=None):
    """If tuningcurve is provided, then we map it back to the external coordinates / units.
    Otherwise, we stay in the bin space."""
    n_xbins = posterior.shape[0]

    if tuningcurve is None:
        xmin = 0
        xmax = n_xbins
    else:
        # TODO: this only works for TuningCurve1D currently
        if isinstance(tuningcurve, auxiliary.TuningCurve1D):
            xmin = tuningcurve.bins[0]
            xmax = tuningcurve.bins[-1]
        else:
            raise TypeError("tuningcurve type not yet supported!")

    _, bins = np.histogram([], bins=n_xbins, range=(xmin,xmax))
    xbins = (bins + xmax/n_xbins)[:-1]

    mode_pth = np.argmax(posterior, axis=0)*xmax/n_xbins
    mode_pth = np.where(np.isnan(posterior.sum(axis=0)), np.nan, mode_pth)

    return mode_pth
项目:nelpy    作者:nelpy    | 项目源码 | 文件源码
def get_mean_pth_from_array(posterior, tuningcurve=None):
    """If tuningcurve is provided, then we map it back to the external coordinates / units.
    Otherwise, we stay in the bin space."""
    n_xbins = posterior.shape[0]

    if tuningcurve is None:
        xmin = 0
        xmax = 1
    else:
        # TODO: this only works for TuningCurve1D currently
        if isinstance(tuningcurve, auxiliary.TuningCurve1D):
            xmin = tuningcurve.bins[0]
            xmax = tuningcurve.bins[-1]
        else:
            raise TypeError("tuningcurve type not yet supported!")

    _, bins = np.histogram([], bins=n_xbins, range=(xmin,xmax))
    xbins = (bins + xmax/n_xbins)[:-1]

    mean_pth = (xbins * posterior.T).sum(axis=1)

    return mean_pth
项目:SecuML    作者:ANSSI-FR    | 项目源码 | 文件源码
def generateHistogram(self):
        # 10 equal-width bins computed on all the data
        if not self.has_true_labels:
            hist, bin_edges = np.histogram(self.plot_datasets['all'].values, bins = 10, density = False)
        else:
            hist, bin_edges = np.histogram(self.plot_datasets['malicious'].values, bins = 10, density = False)
        x_labels = [str(bin_edges[e]) + ' - ' + str(bin_edges[e+1]) for e in range(len(bin_edges)-1)]
        barplot = BarPlot(x_labels)
        for label, dataset in self.plot_datasets.iteritems():
            hist, bin_edges = np.histogram(dataset.values, bins = bin_edges, density = False)
            hist_dataset = PlotDataset(hist, dataset.label)
            hist_dataset.setColor(dataset.color)
            barplot.addDataset(hist_dataset)
        output_filename = self.output_directory + 'histogram.json'
        with open(output_filename, 'w') as f:
            barplot.exportJson(f)
项目:antgo    作者:jianzfb    | 项目源码 | 文件源码
def reorganize_histogram_data(self, data):
    data_x, data_y = data
    try:
      data_x = float(data_x)
    except:
      logger.error("Channel X Must be Scalar Data")

    try:
      data_y = data_y.flatten()
      bins = 10 # default bins
      if "BINS" in self.params:
        bins = self.params['BINS']

      data_y = np.histogram(data_y, bins)
    except:
      logger.error("Channel Y Must be Numpy Array")
    return (data_x, data_y)
项目:pyjoyplot    作者:neal-o-r    | 项目源码 | 文件源码
def plot(data=None, x=None, y=None, hue=None, kind='line', 
        offset=0.75, cmap='Dark2', smooth=1, order=None, bins=10, weights=None, figsize=None):
    '''
    Create 'Joy Plot':
        data (pd.DataFrame): DataFrame holding all data
        x (str)  : DataFrame column to use as x value
        y (str)  : DataFrame column to use as y values 
        hue (str): DataFrame column to use to group data
        kind (str): specify plot type; line or hist
        offset (int/float): vertical seperation between plots
        cmap (str/list): name of matplotlib cmap, or list
                 of colors to be used for plots
        smooth (int): smoothing window, if smoothing to be applied
        order (list): order of categories - top to bottom
        bins (int/list): bins if using hist. int for all hists to have same bins
                 else list of bin no. for each hist
        weights (boolean/list): should the histogram be weighted?

    ''' 


    plotter = _pyjoyplotter(data=data, x=x, y=y, hue=hue,
            offset=offset, cmap=cmap, smooth=smooth, kind=kind,
            order=order, bins=bins, weights=weights, figsize=figsize)
    return plotter._plot()
项目:pyku    作者:dubvulture    | 项目源码 | 文件源码
def classify(self, image):
        """
        Given a 28x28 image, returns an array representing the 2 highest
        probable prediction
        :param image:
        :return: array of 2 highest prob-digit tuples
        """
        if cv2.__version__[0] == '2':
            res = self.model.find_nearest(np.array([self.feature(image)]), k=11)
        else:
            res = self.model.findNearest(np.array([self.feature(image)]), k=11)
        hist = np.histogram(res[2], bins=9, range=(1, 10), normed=True)[0]
        zipped = sorted(zip(hist, np.arange(1, 10)), reverse=True)
        return np.array(zipped[:2])
项目:nanoQC    作者:wdecoster    | 项目源码 | 文件源码
def length_histogram(fqin, name):
    '''
    Create a histogram, and return the bin edges of the bin containing the most reads
    '''
    logging.info("Creating length histogram to find bin with most reads.")
    lengths = get_lengths(fqin)
    plt.hist(lengths, bins='auto')
    plt.savefig(name, format='png', dpi=100)
    plt.close("all")
    hist, bin_edges = np.histogram(lengths, bins='auto')
    maxindex = np.argmax(hist)
    return (bin_edges[maxindex], bin_edges[maxindex + 1])
项目:npstreams    作者:LaurentRDC    | 项目源码 | 文件源码
def test_against_numpy(self):
        source = [np.random.random((16, 12, 5)) for _ in range(10)]
        stack = np.stack(source, axis = -1)

        bins = np.linspace(0, 1, num = 10)
        from_numpy = np.histogram(stack, bins = bins)[0]
        from_ihistogram = last(ihistogram(source, bins = bins))

        # Since histogram output is int, cannot use allclose
        self.assertTrue(np.all(np.equal(from_numpy, from_ihistogram)))
项目:npstreams    作者:LaurentRDC    | 项目源码 | 文件源码
def ihistogram(arrays, bins):
    """
    Streaming histogram calculation.

    Parameters
    ----------
    arrays : iterable of ndarrays
        Arrays to be combined. This iterable can also a generator. Arrays in this stream
        can be of any shape; the histogram is computed over the flattened array.
    bins : iterable
        Bin edges, including the rightmost edge, allowing for non-uniform bin widths.

    Yields
    ------
    hist : `~numpy.ndarray`
        Streamed histogram.

    See Also
    --------
    numpy.histogram : 1D histogram of dense arrays.
    """
    # TODO: weights
    bins = np.asarray(bins)

    # np.histogram also returns the bin edges, which we ignore
    hist_func = lambda arr: np.histogram(arr, bins = bins)[0]
    hist = hist_func(next(arrays))
    yield hist

    for arr in arrays:
        hist += hist_func(arr)
        yield hist
项目:Deep360Pilot-optical-flow    作者:yenchenlin    | 项目源码 | 文件源码
def gradient_histogram(flow_img, binsize=12):
    """ calculate histogram """
    assert len(flow_img.shape) == 3, "Wrong flow image."

    # NOTE the frame is in RGB, while cv2 is in BGR, so do REMEMBER to reverse it.
    img_mag, img_v, img_u = np.split(flow_img, 3, 2)

    # NOTE the role reversal: the "y-coordinate" is the first function parameter, the "x-coordinate" is the second.
    # NOTE that we use same axis configure as image axis(x is larger to the right, y is larger to the bottom),
    # so add a minus sign before img_v, to make the two axis align.
    orientation = np.arctan2(-img_v, img_u)

    # Original result not applicable
    # Directly use full 360 degree
    new_orient = orientation

    # Prune zero motion
    _mag_greater_zero = img_mag > 0.0
    pruned_orient = new_orient[_mag_greater_zero]

    # Histogram of optical flow
    hofbins = np.arange(-math.pi, math.pi+1e-6, 2*math.pi/binsize)
    hist, bin_edges = np.histogram(pruned_orient.flatten(), bins= hofbins) #, density=True)

    # Normalize
    hist = hist.astype(np.float32) / (np.sum(_mag_greater_zero) + 1e-6)

    return hist, bin_edges