Python pandas 模块,Int64Index() 实例源码

我们从Python开源项目中,提取了以下14个代码示例,用于说明如何使用pandas.Int64Index()

项目:zipline-chinese    作者:zhanghan1990    | 项目源码 | 文件源码
def __init__(self, data, **kwargs):
        assert isinstance(data.index, pd.tseries.index.DatetimeIndex)
        # Only accept integer SIDs as the items of the DataFrame
        assert isinstance(data.columns, pd.Int64Index)
        # TODO is ffilling correct/necessary?
        # Forward fill prices
        self.data = data.fillna(method='ffill')
        # Unpack config dictionary with default values.
        self.start = kwargs.get('start', self.data.index[0])
        self.end = kwargs.get('end', self.data.index[-1])
        self.sids = self.data.columns

        # Hash_value for downstream sorting.
        self.arg_string = hash_args(data, **kwargs)

        self._raw_data = None

        self.started_sids = set()
项目:zipline-chinese    作者:zhanghan1990    | 项目源码 | 文件源码
def __init__(self, data, **kwargs):
        assert isinstance(data.major_axis, pd.tseries.index.DatetimeIndex)
        # Only accept integer SIDs as the items of the Panel
        assert isinstance(data.items, pd.Int64Index)
        # TODO is ffilling correct/necessary?
        # forward fill with volumes of 0
        self.data = data.fillna(value={'volume': 0})
        # Unpack config dictionary with default values.
        self.start = kwargs.get('start', self.data.major_axis[0])
        self.end = kwargs.get('end', self.data.major_axis[-1])
        self.sids = self.data.items

        # Hash_value for downstream sorting.
        self.arg_string = hash_args(data, **kwargs)

        self._raw_data = None

        self.started_sids = set()
项目:catalyst    作者:enigmampc    | 项目源码 | 文件源码
def _dt_to_epoch_ns(dt_series):
    """Convert a timeseries into an Int64Index of nanoseconds since the epoch.

    Parameters
    ----------
    dt_series : pd.Series
        The timeseries to convert.

    Returns
    -------
    idx : pd.Int64Index
        The index converted to nanoseconds since the epoch.
    """
    index = pd.to_datetime(dt_series.values)
    if index.tzinfo is None:
        index = index.tz_localize('UTC')
    else:
        index = index.tz_convert('UTC')
    return index.view(np.int64)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_abc_types(self):
        self.assertIsInstance(pd.Index(['a', 'b', 'c']), com.ABCIndex)
        self.assertIsInstance(pd.Int64Index([1, 2, 3]), com.ABCInt64Index)
        self.assertIsInstance(pd.Float64Index([1, 2, 3]), com.ABCFloat64Index)
        self.assertIsInstance(self.multi_index, com.ABCMultiIndex)
        self.assertIsInstance(self.datetime_index, com.ABCDatetimeIndex)
        self.assertIsInstance(self.timedelta_index, com.ABCTimedeltaIndex)
        self.assertIsInstance(self.period_index, com.ABCPeriodIndex)
        self.assertIsInstance(self.categorical_df.index,
                              com.ABCCategoricalIndex)
        self.assertIsInstance(pd.Index(['a', 'b', 'c']), com.ABCIndexClass)
        self.assertIsInstance(pd.Int64Index([1, 2, 3]), com.ABCIndexClass)
        self.assertIsInstance(pd.Series([1, 2, 3]), com.ABCSeries)
        self.assertIsInstance(self.df, com.ABCDataFrame)
        self.assertIsInstance(self.df.to_panel(), com.ABCPanel)
        self.assertIsInstance(self.sparse_series, com.ABCSparseSeries)
        self.assertIsInstance(self.sparse_array, com.ABCSparseArray)
        self.assertIsInstance(self.categorical, com.ABCCategorical)
        self.assertIsInstance(pd.Period('2012', freq='A-DEC'), com.ABCPeriod)
项目:psst    作者:power-system-simulation-toolbox    | 项目源码 | 文件源码
def setattributeindex(self, instance, value):
        bus_name = instance.bus.index
        instance.branch['F_BUS'] = instance.branch['F_BUS'].apply(lambda x: value[bus_name.get_loc(x)])
        instance.branch['T_BUS'] = instance.branch['T_BUS'].apply(lambda x: value[bus_name.get_loc(x)])
        instance.gen['GEN_BUS'] = instance.gen['GEN_BUS'].apply(lambda x: value[bus_name.get_loc(x)])

        try:
            instance.load.columns = [v for b, v in zip(instance.bus_name.isin(instance.load.columns), value) if b == True]
        except ValueError:
            instance.load.columns = value
        except AttributeError:
            instance.load = pd.DataFrame(0, index=range(0, 1), columns=value, dtype='float')

        instance.bus.index = value

        if isinstance(instance.bus_name, pd.RangeIndex) or isinstance(instance.bus_name, pd.Int64Index):
            logger.debug('Forcing string types for all bus names')
            instance.bus_name = ['Bus{}'.format(b) for b in instance.bus_name]
项目:zipline-chinese    作者:zhanghan1990    | 项目源码 | 文件源码
def setUpClass(cls):
        cls.__calendar = date_range('2014', '2015', freq=trading_day)
        cls.__assets = assets = Int64Index(arange(1, 20))
        cls.__tmp_finder_ctx = tmp_asset_finder(
            equities=make_simple_equity_info(
                assets,
                cls.__calendar[0],
                cls.__calendar[-1],
            )
        )
        cls.__finder = cls.__tmp_finder_ctx.__enter__()
        cls.__mask = cls.__finder.lifetimes(
            cls.__calendar[-30:],
            include_start_date=False,
        )
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_outer_join_sort(self):
        left_idx = Index(np.random.permutation(15))
        right_idx = tm.makeDateIndex(10)

        with tm.assert_produces_warning(RuntimeWarning):
            joined = left_idx.join(right_idx, how='outer')

        # right_idx in this case because DatetimeIndex has join precedence over
        # Int64Index
        with tm.assert_produces_warning(RuntimeWarning):
            expected = right_idx.astype(object).union(left_idx.astype(object))
        tm.assert_index_equal(joined, expected)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_reindex_doesnt_preserve_type_if_target_is_empty_index(self):
        # GH7774
        idx = pd.Index(list('abc'))

        def get_reindex_type(target):
            return idx.reindex(target)[0].dtype.type

        self.assertEqual(get_reindex_type(pd.Int64Index([])), np.int64)
        self.assertEqual(get_reindex_type(pd.Float64Index([])), np.float64)
        self.assertEqual(get_reindex_type(pd.DatetimeIndex([])), np.datetime64)

        reindexed = idx.reindex(pd.MultiIndex(
            [pd.Int64Index([]), pd.Float64Index([])], [[], []]))[0]
        self.assertEqual(reindexed.levels[0].dtype.type, np.int64)
        self.assertEqual(reindexed.levels[1].dtype.type, np.float64)
项目:psst    作者:power-system-simulation-toolbox    | 项目源码 | 文件源码
def setattributeindex(self, instance, value):
        instance.gen.index = value
        instance.gencost.index = value

        if isinstance(instance.gen_name, pd.RangeIndex) or isinstance(instance.bus_name, pd.Int64Index):
            instance.gen_name = ['GenCo{}'.format(g) for g in instance.gen_name]
项目:pyflux    作者:RJT1990    | 项目源码 | 文件源码
def shift_dates(self,h):
        """ Auxiliary function for creating dates for forecasts

        Parameters
        ----------
        h : int
            How many steps to forecast

        Returns
        ----------
        A transformed date_index object
        """

        date_index = copy.deepcopy(self.index)
        date_index = date_index[self.max_lag:len(date_index)]

        if self.is_pandas is True:

            if isinstance(date_index, pd.core.indexes.datetimes.DatetimeIndex):

                if pd.infer_freq(date_index) in ['H', 'M', 'S']:

                    for t in range(h):
                        date_index += pd.DateOffset((date_index[len(date_index)-1] - date_index[len(date_index)-2]).seconds)

                else: # Assume higher frequency (configured for days)

                    for t in range(h):
                        date_index += pd.DateOffset((date_index[len(date_index)-1] - date_index[len(date_index)-2]).days)

            elif isinstance(date_index, pd.core.indexes.numeric.Int64Index):

                for i in range(h):
                    new_value = date_index.values[len(date_index.values)-1] + (date_index.values[len(date_index.values)-1] - date_index.values[len(date_index.values)-2])
                    date_index = pd.Int64Index(np.append(date_index.values,new_value))

        else:

            for t in range(h):
                date_index.append(date_index[len(date_index)-1]+1)

        return date_index
项目:catalyst    作者:enigmampc    | 项目源码 | 文件源码
def get_adjustments(self,
                        zero_qtr_data,
                        requested_qtr_data,
                        last_per_qtr,
                        dates,
                        assets,
                        columns,
                        **kwargs):
        """
        Creates an AdjustedArray from the given estimates data for the given
        dates.

        Parameters
        ----------
        zero_qtr_data : pd.DataFrame
            The 'time zero' data for each calendar date per sid.
        requested_qtr_data : pd.DataFrame
            The requested quarter data for each calendar date per sid.
        last_per_qtr : pd.DataFrame
            A DataFrame with a column MultiIndex of [self.estimates.columns,
            normalized_quarters, sid] that allows easily getting the timeline
            of estimates for a particular sid for a particular quarter.
        dates : pd.DatetimeIndex
            The calendar dates for which estimates data is requested.
        assets : pd.Int64Index
            An index of all the assets from the raw data.
        columns : list of BoundColumn
            The columns for which adjustments need to be calculated.
        kwargs :
            Additional keyword arguments that should be forwarded to
            `get_adjustments_for_sid` and to be used in computing adjustments
            for each sid.

        Returns
        -------
        col_to_all_adjustments : dict[int -> AdjustedArray]
            A dictionary of all adjustments that should be applied.
        """

        zero_qtr_data.sort_index(inplace=True)
        # Here we want to get the LAST record from each group of records
        # corresponding to a single quarter. This is to ensure that we select
        # the most up-to-date event date in case the event date changes.
        quarter_shifts = zero_qtr_data.groupby(
            level=[SID_FIELD_NAME, NORMALIZED_QUARTERS]
        ).nth(-1)

        col_to_all_adjustments = {}
        sid_to_idx = dict(zip(assets, range(len(assets))))
        quarter_shifts.groupby(level=SID_FIELD_NAME).apply(
            self.get_adjustments_for_sid,
            dates,
            requested_qtr_data,
            last_per_qtr,
            sid_to_idx,
            columns,
            col_to_all_adjustments,
            **kwargs
        )
        return col_to_all_adjustments
项目:catalyst    作者:enigmampc    | 项目源码 | 文件源码
def test_categorical_df_concat(self):

        inp = [
            pd.DataFrame(
                {
                    'A': pd.Series(['a', 'b', 'c'], dtype='category'),
                    'B': pd.Series([100, 102, 103], dtype='int64'),
                    'C': pd.Series(['x', 'x', 'x'], dtype='category'),
                }
            ),
            pd.DataFrame(
                {
                    'A': pd.Series(['c', 'b', 'd'], dtype='category'),
                    'B': pd.Series([103, 102, 104], dtype='int64'),
                    'C': pd.Series(['y', 'y', 'y'], dtype='category'),
                }
            ),
            pd.DataFrame(
                {
                    'A': pd.Series(['a', 'b', 'd'], dtype='category'),
                    'B': pd.Series([101, 102, 104], dtype='int64'),
                    'C': pd.Series(['z', 'z', 'z'], dtype='category'),
                }
            ),
        ]
        result = categorical_df_concat(inp)

        expected = pd.DataFrame(
            {
                'A': pd.Series(
                    ['a', 'b', 'c', 'c', 'b', 'd', 'a', 'b', 'd'],
                    dtype='category'
                ),
                'B': pd.Series(
                    [100, 102, 103, 103, 102, 104, 101, 102, 104],
                    dtype='int64'
                ),
                'C': pd.Series(
                    ['x', 'x', 'x', 'y', 'y', 'y', 'z', 'z', 'z'],
                    dtype='category'
                ),
            },
        )
        expected.index = pd.Int64Index([0, 1, 2, 0, 1, 2, 0, 1, 2])
        assert_equal(expected, result)
        assert_equal(
            expected['A'].cat.categories,
            result['A'].cat.categories
        )
        assert_equal(
            expected['C'].cat.categories,
            result['C'].cat.categories
        )
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def infer_freq(index, warn=True):
    """
    Infer the most likely frequency given the input index. If the frequency is
    uncertain, a warning will be printed.

    Parameters
    ----------
    index : DatetimeIndex or TimedeltaIndex
      if passed a Series will use the values of the series (NOT THE INDEX)
    warn : boolean, default True

    Returns
    -------
    freq : string or None
        None if no discernible frequency
        TypeError if the index is not datetime-like
        ValueError if there are less than three values.
    """
    import pandas as pd

    if isinstance(index, com.ABCSeries):
        values = index._values
        if not (com.is_datetime64_dtype(values) or
                com.is_timedelta64_dtype(values) or
                values.dtype == object):
            raise TypeError("cannot infer freq from a non-convertible "
                            "dtype on a Series of {0}".format(index.dtype))
        index = values

    if com.is_period_arraylike(index):
        raise TypeError("PeriodIndex given. Check the `freq` attribute "
                        "instead of using infer_freq.")
    elif isinstance(index, pd.TimedeltaIndex):
        inferer = _TimedeltaFrequencyInferer(index, warn=warn)
        return inferer.get_freq()

    if isinstance(index, pd.Index) and not isinstance(index, pd.DatetimeIndex):
        if isinstance(index, (pd.Int64Index, pd.Float64Index)):
            raise TypeError("cannot infer freq from a non-convertible index "
                            "type {0}".format(type(index)))
        index = index.values

    if not isinstance(index, pd.DatetimeIndex):
        try:
            index = pd.DatetimeIndex(index)
        except AmbiguousTimeError:
            index = pd.DatetimeIndex(index.asi8)

    inferer = _FrequencyInferer(index, warn=warn)
    return inferer.get_freq()
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_constructor_dtypes(self):

        for idx in [Index(np.array([1, 2, 3], dtype=int)),
                    Index(np.array([1, 2, 3], dtype=int), dtype=int),
                    Index([1, 2, 3], dtype=int)]:
            self.assertIsInstance(idx, Int64Index)

        # these should coerce
        for idx in [Index(np.array([1., 2., 3.], dtype=float), dtype=int),
                    Index([1., 2., 3.], dtype=int)]:
            self.assertIsInstance(idx, Int64Index)

        for idx in [Index(np.array([1., 2., 3.], dtype=float)),
                    Index(np.array([1, 2, 3], dtype=int), dtype=float),
                    Index(np.array([1., 2., 3.], dtype=float), dtype=float),
                    Index([1, 2, 3], dtype=float),
                    Index([1., 2., 3.], dtype=float)]:
            self.assertIsInstance(idx, Float64Index)

        for idx in [Index(np.array([True, False, True], dtype=bool)),
                    Index([True, False, True]),
                    Index(np.array([True, False, True], dtype=bool), dtype=bool),
                    Index([True, False, True], dtype=bool)]:
            self.assertIsInstance(idx, Index)
            self.assertEqual(idx.dtype, object)

        for idx in [Index(np.array([1, 2, 3], dtype=int), dtype='category'),
                    Index([1, 2, 3], dtype='category'),
                    Index(np.array([np.datetime64('2011-01-01'),
                                    np.datetime64('2011-01-02')]), dtype='category'),
                    Index([datetime(2011, 1, 1), datetime(2011, 1, 2)], dtype='category')]:
            self.assertIsInstance(idx, CategoricalIndex)

        for idx in [Index(np.array([np.datetime64('2011-01-01'),
                                    np.datetime64('2011-01-02')])),
                    Index([datetime(2011, 1, 1), datetime(2011, 1, 2)])]:
            self.assertIsInstance(idx, DatetimeIndex)

        for idx in [Index(np.array([np.datetime64('2011-01-01'),
                                    np.datetime64('2011-01-02')]), dtype=object),
                    Index([datetime(2011, 1, 1),
                           datetime(2011, 1, 2)], dtype=object)]:
            self.assertNotIsInstance(idx, DatetimeIndex)
            self.assertIsInstance(idx, Index)
            self.assertEqual(idx.dtype, object)

        for idx in [Index(np.array([np.timedelta64(1, 'D'), np.timedelta64(
                1, 'D')])), Index([timedelta(1), timedelta(1)])]:
            self.assertIsInstance(idx, TimedeltaIndex)

        for idx in [Index(np.array([np.timedelta64(1, 'D'),
                                    np.timedelta64(1, 'D')]), dtype=object),
                    Index([timedelta(1), timedelta(1)], dtype=object)]:
            self.assertNotIsInstance(idx, TimedeltaIndex)
            self.assertIsInstance(idx, Index)
            self.assertEqual(idx.dtype, object)