Python pandas 模块,to_timedelta() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用pandas.to_timedelta()

项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def _wrap_result(self, result, block=None, obj=None):
        """ wrap a single result """

        if obj is None:
            obj = self._selected_obj
        if isinstance(result, np.ndarray):

            # coerce if necessary
            if block is not None:
                if com.is_timedelta64_dtype(block.values.dtype):
                    result = pd.to_timedelta(
                        result.ravel(), unit='ns').values.reshape(result.shape)

            if result.ndim == 1:
                from pandas import Series
                return Series(result, obj.index, name=obj.name)

            return type(obj)(result, index=obj.index, columns=block.columns)
        return result
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_to_timedelta_invalid(self):

        # these will error
        self.assertRaises(ValueError, lambda: to_timedelta([1, 2], unit='foo'))
        self.assertRaises(ValueError, lambda: to_timedelta(1, unit='foo'))

        # time not supported ATM
        self.assertRaises(ValueError, lambda: to_timedelta(time(second=1)))
        self.assertTrue(to_timedelta(
            time(second=1), errors='coerce') is pd.NaT)

        self.assertRaises(ValueError, lambda: to_timedelta(['foo', 'bar']))
        tm.assert_index_equal(TimedeltaIndex([pd.NaT, pd.NaT]),
                              to_timedelta(['foo', 'bar'], errors='coerce'))

        tm.assert_index_equal(TimedeltaIndex(['1 day', pd.NaT, '1 min']),
                              to_timedelta(['1 day', 'bar', '1 min'],
                                           errors='coerce'))
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_to_timedelta_on_missing_values(self):
        # GH5438
        timedelta_NaT = np.timedelta64('NaT')

        actual = pd.to_timedelta(Series(['00:00:01', np.nan]))
        expected = Series([np.timedelta64(1000000000, 'ns'),
                           timedelta_NaT], dtype='<m8[ns]')
        assert_series_equal(actual, expected)

        actual = pd.to_timedelta(Series(['00:00:01', pd.NaT]))
        assert_series_equal(actual, expected)

        actual = pd.to_timedelta(np.nan)
        self.assertEqual(actual.value, timedelta_NaT.astype('int64'))

        actual = pd.to_timedelta(pd.NaT)
        self.assertEqual(actual.value, timedelta_NaT.astype('int64'))
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_apply_to_timedelta(self):
        timedelta_NaT = pd.to_timedelta('NaT')

        list_of_valid_strings = ['00:00:01', '00:00:02']
        a = pd.to_timedelta(list_of_valid_strings)
        b = Series(list_of_valid_strings).apply(pd.to_timedelta)
        # Can't compare until apply on a Series gives the correct dtype
        # assert_series_equal(a, b)

        list_of_strings = ['00:00:01', np.nan, pd.NaT, timedelta_NaT]

        # TODO: unused?
        a = pd.to_timedelta(list_of_strings)  # noqa
        b = Series(list_of_strings).apply(pd.to_timedelta)  # noqa
        # Can't compare until apply on a Series gives the correct dtype
        # assert_series_equal(a, b)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_get_indexer(self):
        idx = pd.date_range('2000-01-01', periods=3)
        tm.assert_numpy_array_equal(idx.get_indexer(idx), [0, 1, 2])

        target = idx[0] + pd.to_timedelta(['-1 hour', '12 hours',
                                           '1 day 1 hour'])
        tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), [-1, 0, 1])
        tm.assert_numpy_array_equal(
            idx.get_indexer(target, 'backfill'), [0, 1, 2])
        tm.assert_numpy_array_equal(
            idx.get_indexer(target, 'nearest'), [0, 1, 1])
        tm.assert_numpy_array_equal(
            idx.get_indexer(target, 'nearest',
                            tolerance=pd.Timedelta('1 hour')),
            [0, -1, 1])
        with tm.assertRaises(ValueError):
            idx.get_indexer(idx[[0]], method='nearest', tolerance='foo')
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_get_loc(self):
        idx = pd.to_timedelta(['0 days', '1 days', '2 days'])

        for method in [None, 'pad', 'backfill', 'nearest']:
            self.assertEqual(idx.get_loc(idx[1], method), 1)
            self.assertEqual(idx.get_loc(idx[1].to_pytimedelta(), method), 1)
            self.assertEqual(idx.get_loc(str(idx[1]), method), 1)

        self.assertEqual(
            idx.get_loc(idx[1], 'pad', tolerance=pd.Timedelta(0)), 1)
        self.assertEqual(
            idx.get_loc(idx[1], 'pad', tolerance=np.timedelta64(0, 's')), 1)
        self.assertEqual(idx.get_loc(idx[1], 'pad', tolerance=timedelta(0)), 1)

        with tm.assertRaisesRegexp(ValueError, 'must be convertible'):
            idx.get_loc(idx[1], method='nearest', tolerance='foo')

        for method, loc in [('pad', 1), ('backfill', 2), ('nearest', 1)]:
            self.assertEqual(idx.get_loc('1 day 1 hour', method), loc)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_cummax_timedelta64(self):
        s = pd.Series(pd.to_timedelta(['NaT',
                                       '2 min',
                                       'NaT',
                                       '1 min',
                                       'NaT',
                                       '3 min', ]))

        expected = pd.Series(pd.to_timedelta(['NaT',
                                              '2 min',
                                              'NaT',
                                              '2 min',
                                              'NaT',
                                              '3 min', ]))
        result = s.cummax(skipna=True)
        self.assert_series_equal(expected, result)

        expected = pd.Series(pd.to_timedelta(['NaT',
                                              '2 min',
                                              '2 min',
                                              '2 min',
                                              '2 min',
                                              '3 min', ]))
        result = s.cummax(skipna=False)
        self.assert_series_equal(expected, result)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_constructor_dict_timedelta_index(self):
        # GH #12169 : Resample category data with timedelta index
        # construct Series from dict as data and TimedeltaIndex as index
        # will result NaN in result Series data
        expected = Series(
            data=['A', 'B', 'C'],
            index=pd.to_timedelta([0, 10, 20], unit='s')
        )

        result = Series(
            data={pd.to_timedelta(0, unit='s'): 'A',
                  pd.to_timedelta(10, unit='s'): 'B',
                  pd.to_timedelta(20, unit='s'): 'C'},
            index=pd.to_timedelta([0, 10, 20], unit='s')
        )
        # this should work
        assert_series_equal(result, expected)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_timedelta64_dtype_array_returned(self):
        # GH 9431
        expected = np.array([31200, 45678, 10000], dtype='m8[ns]')

        td_index = pd.to_timedelta([31200, 45678, 31200, 10000, 45678])
        result = algos.unique(td_index)
        tm.assert_numpy_array_equal(result, expected)
        self.assertEqual(result.dtype, expected.dtype)

        s = pd.Series(td_index)
        result = algos.unique(s)
        tm.assert_numpy_array_equal(result, expected)
        self.assertEqual(result.dtype, expected.dtype)

        arr = s.values
        result = algos.unique(arr)
        tm.assert_numpy_array_equal(result, expected)
        self.assertEqual(result.dtype, expected.dtype)
项目:JDcontest    作者:zsyandjyhouse    | 项目源码 | 文件源码
def make_fea_set(sku_fea, user_fea, train_start_date, train_end_time,action_data):
    start_days = "2016-02-01"
    # generate ????
    actions = None
    for i in (1, 2, 3, 5, 7, 10, 15, 21, 30):
    #for i in (1, 2, 3, 5, 7, 10, 15, 21, 30):
        start_time = train_end_time - pd.to_timedelta(str(i)+' days')
        if actions is None:
            actions = get_action_feat(start_time, train_end_time,action_data)
        else:
            actions = pd.merge(actions, get_action_feat(start_time, train_end_time,action_data), how='left',
                               on=['user_id', 'sku_id'])

    actions = pd.merge(actions, user_fea, how='left', on='user_id')
    actions = pd.merge(actions, sku_fea, how='left', on='sku_id')

    actions = actions.fillna(0)
    print 'fea_weidu3',actions.shape
    #actions.to_csv('test'+str(train_end_time).split(' ')[0]+'.csv')
    return actions
项目:scikit-dataaccess    作者:MITHaystack    | 项目源码 | 文件源码
def output(self):
        ''' 
        Generate data wrapper for Mahali temperatures

        @return Mahali temperature data wrapper
        '''

        # Function to extract date from filename (only month/day/year, no hours/minutes/seconds)
        def toDateTime(in_filename):
            return pd.to_datetime(pd.to_datetime(in_filename[7:25]).strftime('%Y-%m-%d'))


        # Read in file list:
        mahali_temperature_info = resource_filename('skdaccess', os.path.join('support','mahali_temperature_info.txt'))
        filenames = pd.read_csv(mahali_temperature_info,header=None,
                                names=('station','filename'),
                                skipinitialspace=True)


        # Create a columns of dates
        filenames['date'] = filenames['filename'].apply(toDateTime)

        # Need to grab day before as data can spill over
        adjusted_start_date = self.start_date - pd.to_timedelta('1d')
        adjusted_end_date = self.end_date + pd.to_timedelta('1d')


        station_list = self.ap_paramList[0]()
        # Get data for each selected station one day before until one day afte requested date
        index_to_retrieve = np.logical_and.reduce([filenames.loc[:, 'station'].apply(lambda x: x in station_list),
                                                   filenames.loc[:, 'date'] >= adjusted_start_date,
                                                   filenames.loc[:, 'date'] <= self.end_date])

        all_temperature_data = self.retrieveOnlineData(filenames[index_to_retrieve])

        # Due to data spillover, cut each data frame in dictionary
        for station in all_temperature_data.keys():
            all_temperature_data[station] = all_temperature_data[station].loc[adjusted_start_date:adjusted_end_date]

        # Return table wrapper of data
        return TableWrapper(all_temperature_data, default_columns = ['Temperature'])
项目:scikit-dataaccess    作者:MITHaystack    | 项目源码 | 文件源码
def output(self):
        ''' 
        Generate data wrapper for USGS geomagnetic data

        @return geomagnetic data wrapper
        '''

        observatory_list = self.ap_paramList[0]()

        # USGS Edge server
        base_url = 'cwbpub.cr.usgs.gov'     
        factory = EdgeFactory(host=base_url, port=2060)

        data_dict = OrderedDict()
        for observatory in observatory_list:
            ret_data = factory.get_timeseries( observatory=observatory,
                                               interval=self.interval,
                                               type=self.data_type,
                                               channels=self.channels,
                                               starttime=UTCDateTime(self.start_time),
                                               endtime=UTCDateTime(self.end_time))

            obs_data = OrderedDict()
            for label, trace in zip(self.channels, ret_data):
                time = pd.to_datetime(trace.stats['starttime'].datetime) + pd.to_timedelta(trace.times(),unit='s')
                obs_data[label] = pd.Series(trace.data,time)


            data_dict[observatory] = pd.DataFrame(obs_data)


        return TableWrapper(data_dict, default_columns=self.channels)
项目:scikit-dataaccess    作者:MITHaystack    | 项目源码 | 文件源码
def dateMismatch(dates, days=10):
    '''
    Check if dates are not within a certain number of days of each other

    @param dates: Iterable container of pandas timestamps
    @param days: Number of days

    @return true if they are not with 10 days, false otherwise
    '''
    for combo in combinations(dates,2):
        if np.abs(combo[0] - combo[1]) > pd.to_timedelta(days, 'D'):
            return True
    return False
项目:DGP    作者:DynamicGravitySystems    | 项目源码 | 文件源码
def convert_gps_time(gpsweek, gpsweekseconds, format='unix'):
    """
    convert_gps_time :: (String -> String) -> Float

    Converts a GPS time format (weeks + seconds since 6 Jan 1980) to a UNIX timestamp
    (seconds since 1 Jan 1970) without correcting for UTC leap seconds.

    Static values gps_delta and gpsweek_cf are defined by the below functions (optimization)
    gps_delta is the time difference (in seconds) between UNIX time and GPS time.
    gps_delta = (dt.datetime(1980, 1, 6) - dt.datetime(1970, 1, 1)).total_seconds()

    gpsweek_cf is the coefficient to convert weeks to seconds
    gpsweek_cf = 7 * 24 * 60 * 60  # 604800

    :param gpsweek: Number of weeks since beginning of GPS time (1980-01-06 00:00:00)
    :param gpsweekseconds: Number of seconds since the GPS week parameter
    :return: (float) unix timestamp (number of seconds since 1970-01-01 00:00:00)
    """
    # GPS time begins 1980 Jan 6 00:00, UNIX time begins 1970 Jan 1 00:00
    gps_delta = 315964800.0
    gpsweek_cf = 604800

    if isinstance(gpsweek, pd.Series) and isinstance(gpsweekseconds, pd.Series):
        gps_ticks = (gpsweek.astype('float64') * gpsweek_cf) + gpsweekseconds.astype('float64')
    else:
        gps_ticks = (float(gpsweek) * gpsweek_cf) + float(gpsweekseconds)

    timestamp = gps_delta + gps_ticks

    if format == 'unix':
        return timestamp
    elif format == 'datetime':
        return datetime.datetime(1970, 1, 1) + pd.to_timedelta(timestamp, unit='s')
项目:DGP    作者:DynamicGravitySystems    | 项目源码 | 文件源码
def datenum_to_datetime(timestamp):
    if isinstance(timestamp, pd.Series):
        return (timestamp.astype(int).map(datetime.datetime.fromordinal) +
                pd.to_timedelta(timestamp % 1, unit='D') -
                pd.to_timedelta('366 days'))
    else:
        return (datetime.datetime.fromordinal(int(timestamp) - 366) +
                datetime.timedelta(days=timestamp % 1))
项目:heliopy    作者:heliopython    | 项目源码 | 文件源码
def _convert_ulysses_time(data):
    """Method to convert timestamps to datetimes"""
    data.loc[data['year'] > 50, 'year'] += 1900
    data.loc[data['year'] < 50, 'year'] += 2000

    data['Time'] = pd.to_datetime(data['year'].astype(str) + ':' +
                                  data['doy'].astype(str),
                                  format='%Y:%j')
    data['Time'] += (pd.to_timedelta(data['hour'], unit='h') +
                     pd.to_timedelta(data['minute'], unit='m') +
                     pd.to_timedelta(data['second'], unit='s'))
    data = data.drop(['year', 'doy', 'hour', 'minute', 'second'],
                     axis=1)
    return data
项目:dfViewer    作者:sterry24    | 项目源码 | 文件源码
def convertColumnsToTimeDeltas(self,section):
        col=self._df.columns[section]
        self._df[col]=pd.to_timedelta(self._df[col])
        self._dirty = True  
        self.dataChanged.emit()

###############################################################################
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_resample_with_timedeltas(self):

        expected = DataFrame({'A': np.arange(1480)})
        expected = expected.groupby(expected.index // 30).sum()
        expected.index = pd.timedelta_range('0 days', freq='30T', periods=50)

        df = DataFrame({'A': np.arange(1480)}, index=pd.to_timedelta(
            np.arange(1480), unit='T'))
        result = df.resample('30T').sum()

        assert_frame_equal(result, expected)

        s = df['A']
        result = s.resample('30T').sum()
        assert_series_equal(result, expected['A'])
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_ops_ndarray(self):
        td = Timedelta('1 day')

        # timedelta, timedelta
        other = pd.to_timedelta(['1 day']).values
        expected = pd.to_timedelta(['2 days']).values
        self.assert_numpy_array_equal(td + other, expected)
        if LooseVersion(np.__version__) >= '1.8':
            self.assert_numpy_array_equal(other + td, expected)
        self.assertRaises(TypeError, lambda: td + np.array([1]))
        self.assertRaises(TypeError, lambda: np.array([1]) + td)

        expected = pd.to_timedelta(['0 days']).values
        self.assert_numpy_array_equal(td - other, expected)
        if LooseVersion(np.__version__) >= '1.8':
            self.assert_numpy_array_equal(-other + td, expected)
        self.assertRaises(TypeError, lambda: td - np.array([1]))
        self.assertRaises(TypeError, lambda: np.array([1]) - td)

        expected = pd.to_timedelta(['2 days']).values
        self.assert_numpy_array_equal(td * np.array([2]), expected)
        self.assert_numpy_array_equal(np.array([2]) * td, expected)
        self.assertRaises(TypeError, lambda: td * other)
        self.assertRaises(TypeError, lambda: other * td)

        self.assert_numpy_array_equal(td / other, np.array([1]))
        if LooseVersion(np.__version__) >= '1.8':
            self.assert_numpy_array_equal(other / td, np.array([1]))

        # timedelta, datetime
        other = pd.to_datetime(['2000-01-01']).values
        expected = pd.to_datetime(['2000-01-02']).values
        self.assert_numpy_array_equal(td + other, expected)
        if LooseVersion(np.__version__) >= '1.8':
            self.assert_numpy_array_equal(other + td, expected)

        expected = pd.to_datetime(['1999-12-31']).values
        self.assert_numpy_array_equal(-td + other, expected)
        if LooseVersion(np.__version__) >= '1.8':
            self.assert_numpy_array_equal(other - td, expected)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_ops_series(self):
        # regression test for GH8813
        td = Timedelta('1 day')
        other = pd.Series([1, 2])
        expected = pd.Series(pd.to_timedelta(['1 day', '2 days']))
        tm.assert_series_equal(expected, td * other)
        tm.assert_series_equal(expected, other * td)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_timedelta_range(self):

        expected = to_timedelta(np.arange(5), unit='D')
        result = timedelta_range('0 days', periods=5, freq='D')
        tm.assert_index_equal(result, expected)

        expected = to_timedelta(np.arange(11), unit='D')
        result = timedelta_range('0 days', '10 days', freq='D')
        tm.assert_index_equal(result, expected)

        expected = to_timedelta(np.arange(5), unit='D') + Second(2) + Day()
        result = timedelta_range('1 days, 00:00:02', '5 days, 00:00:02',
                                 freq='D')
        tm.assert_index_equal(result, expected)

        expected = to_timedelta([1, 3, 5, 7, 9], unit='D') + Second(2)
        result = timedelta_range('1 days, 00:00:02', periods=5, freq='2D')
        tm.assert_index_equal(result, expected)

        expected = to_timedelta(np.arange(50), unit='T') * 30
        result = timedelta_range('0 days', freq='30T', periods=50)
        tm.assert_index_equal(result, expected)

        # GH 11776
        arr = np.arange(10).reshape(2, 5)
        df = pd.DataFrame(np.arange(10).reshape(2, 5))
        for arg in (arr, df):
            with tm.assertRaisesRegexp(TypeError, "1-d array"):
                to_timedelta(arg)
            for errors in ['ignore', 'raise', 'coerce']:
                with tm.assertRaisesRegexp(TypeError, "1-d array"):
                    to_timedelta(arg, errors=errors)

        # issue10583
        df = pd.DataFrame(np.random.normal(size=(10, 4)))
        df.index = pd.timedelta_range(start='0s', periods=10, freq='s')
        expected = df.loc[pd.Timedelta('0s'):, :]
        result = df.loc['0s':, :]
        assert_frame_equal(expected, result)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_nat_converters(self):
        self.assertEqual(to_timedelta(
            'nat', box=False).astype('int64'), tslib.iNaT)
        self.assertEqual(to_timedelta(
            'nan', box=False).astype('int64'), tslib.iNaT)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_timedelta_ops_scalar(self):
        # GH 6808
        base = pd.to_datetime('20130101 09:01:12.123456')
        expected_add = pd.to_datetime('20130101 09:01:22.123456')
        expected_sub = pd.to_datetime('20130101 09:01:02.123456')

        for offset in [pd.to_timedelta(10, unit='s'), timedelta(seconds=10),
                       np.timedelta64(10, 's'),
                       np.timedelta64(10000000000, 'ns'),
                       pd.offsets.Second(10)]:
            result = base + offset
            self.assertEqual(result, expected_add)

            result = base - offset
            self.assertEqual(result, expected_sub)

        base = pd.to_datetime('20130102 09:01:12.123456')
        expected_add = pd.to_datetime('20130103 09:01:22.123456')
        expected_sub = pd.to_datetime('20130101 09:01:02.123456')

        for offset in [pd.to_timedelta('1 day, 00:00:10'),
                       pd.to_timedelta('1 days, 00:00:10'),
                       timedelta(days=1, seconds=10),
                       np.timedelta64(1, 'D') + np.timedelta64(10, 's'),
                       pd.offsets.Day() + pd.offsets.Second(10)]:
            result = base + offset
            self.assertEqual(result, expected_add)

            result = base - offset
            self.assertEqual(result, expected_sub)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_constructor_coverage(self):
        rng = timedelta_range('1 days', periods=10.5)
        exp = timedelta_range('1 days', periods=10)
        self.assertTrue(rng.equals(exp))

        self.assertRaises(ValueError, TimedeltaIndex, start='1 days',
                          periods='foo', freq='D')

        self.assertRaises(ValueError, TimedeltaIndex, start='1 days',
                          end='10 days')

        self.assertRaises(ValueError, TimedeltaIndex, '1 days')

        # generator expression
        gen = (timedelta(i) for i in range(10))
        result = TimedeltaIndex(gen)
        expected = TimedeltaIndex([timedelta(i) for i in range(10)])
        self.assertTrue(result.equals(expected))

        # NumPy string array
        strings = np.array(['1 days', '2 days', '3 days'])
        result = TimedeltaIndex(strings)
        expected = to_timedelta([1, 2, 3], unit='d')
        self.assertTrue(result.equals(expected))

        from_ints = TimedeltaIndex(expected.asi8)
        self.assertTrue(from_ints.equals(expected))

        # non-conforming freq
        self.assertRaises(ValueError, TimedeltaIndex,
                          ['1 days', '2 days', '4 days'], freq='D')

        self.assertRaises(ValueError, TimedeltaIndex, periods=10, freq='D')
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_conversion_preserves_name(self):
        # GH 10875
        i = pd.Index(['01:02:03', '01:02:04'], name='label')
        self.assertEqual(i.name, pd.to_datetime(i).name)
        self.assertEqual(i.name, pd.to_timedelta(i).name)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def create_index(self):
        return pd.to_timedelta(range(5), unit='d') + pd.offsets.Hour(1)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_get_indexer(self):
        idx = pd.to_timedelta(['0 days', '1 days', '2 days'])
        tm.assert_numpy_array_equal(idx.get_indexer(idx), [0, 1, 2])

        target = pd.to_timedelta(['-1 hour', '12 hours', '1 day 1 hour'])
        tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), [-1, 0, 1])
        tm.assert_numpy_array_equal(
            idx.get_indexer(target, 'backfill'), [0, 1, 2])
        tm.assert_numpy_array_equal(
            idx.get_indexer(target, 'nearest'), [0, 1, 1])
        tm.assert_numpy_array_equal(
            idx.get_indexer(target, 'nearest',
                            tolerance=pd.Timedelta('1 hour')),
            [0, -1, 1])
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_quantile(self):
        from numpy import percentile

        q = self.ts.quantile(0.1)
        self.assertEqual(q, percentile(self.ts.valid(), 10))

        q = self.ts.quantile(0.9)
        self.assertEqual(q, percentile(self.ts.valid(), 90))

        # object dtype
        q = Series(self.ts, dtype=object).quantile(0.9)
        self.assertEqual(q, percentile(self.ts.valid(), 90))

        # datetime64[ns] dtype
        dts = self.ts.index.to_series()
        q = dts.quantile(.2)
        self.assertEqual(q, Timestamp('2000-01-10 19:12:00'))

        # timedelta64[ns] dtype
        tds = dts.diff()
        q = tds.quantile(.25)
        self.assertEqual(q, pd.to_timedelta('24:00:00'))

        # GH7661
        result = Series([np.timedelta64('NaT')]).sum()
        self.assertTrue(result is pd.NaT)

        msg = 'percentiles should all be in the interval \\[0, 1\\]'
        for invalid in [-1, 2, [0.5, -1], [0.5, 2]]:
            with tm.assertRaisesRegexp(ValueError, msg):
                self.ts.quantile(invalid)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_isin_with_i8(self):
        # GH 5021

        expected = Series([True, True, False, False, False])
        expected2 = Series([False, True, False, False, False])

        # datetime64[ns]
        s = Series(date_range('jan-01-2013', 'jan-05-2013'))

        result = s.isin(s[0:2])
        assert_series_equal(result, expected)

        result = s.isin(s[0:2].values)
        assert_series_equal(result, expected)

        # fails on dtype conversion in the first place
        result = s.isin(s[0:2].values.astype('datetime64[D]'))
        assert_series_equal(result, expected)

        result = s.isin([s[1]])
        assert_series_equal(result, expected2)

        result = s.isin([np.datetime64(s[1])])
        assert_series_equal(result, expected2)

        # timedelta64[ns]
        s = Series(pd.to_timedelta(lrange(5), unit='d'))
        result = s.isin(s[0:2])
        assert_series_equal(result, expected)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_timedelta64_operations_with_timedeltas(self):

        # td operate with td
        td1 = Series([timedelta(minutes=5, seconds=3)] * 3)
        td2 = timedelta(minutes=5, seconds=4)
        result = td1 - td2
        expected = Series([timedelta(seconds=0)] * 3) - Series([timedelta(
            seconds=1)] * 3)
        self.assertEqual(result.dtype, 'm8[ns]')
        assert_series_equal(result, expected)

        result2 = td2 - td1
        expected = (Series([timedelta(seconds=1)] * 3) - Series([timedelta(
            seconds=0)] * 3))
        assert_series_equal(result2, expected)

        # roundtrip
        assert_series_equal(result + td2, td1)

        # Now again, using pd.to_timedelta, which should build
        # a Series or a scalar, depending on input.
        td1 = Series(pd.to_timedelta(['00:05:03'] * 3))
        td2 = pd.to_timedelta('00:05:04')
        result = td1 - td2
        expected = Series([timedelta(seconds=0)] * 3) - Series([timedelta(
            seconds=1)] * 3)
        self.assertEqual(result.dtype, 'm8[ns]')
        assert_series_equal(result, expected)

        result2 = td2 - td1
        expected = (Series([timedelta(seconds=1)] * 3) - Series([timedelta(
            seconds=0)] * 3))
        assert_series_equal(result2, expected)

        # roundtrip
        assert_series_equal(result + td2, td1)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_even_day(self):
        delta_1d = pd.to_timedelta(1, unit='D')
        delta_0d = pd.to_timedelta(0, unit='D')
        delta_1s = pd.to_timedelta(1, unit='s')
        delta_500ms = pd.to_timedelta(500, unit='ms')

        drepr = lambda x: x._repr_base(format='even_day')
        self.assertEqual(drepr(delta_1d), "1 days")
        self.assertEqual(drepr(-delta_1d), "-1 days")
        self.assertEqual(drepr(delta_0d), "0 days")
        self.assertEqual(drepr(delta_1s), "0 days 00:00:01")
        self.assertEqual(drepr(delta_500ms), "0 days 00:00:00.500000")
        self.assertEqual(drepr(delta_1d + delta_1s), "1 days 00:00:01")
        self.assertEqual(
            drepr(delta_1d + delta_500ms), "1 days 00:00:00.500000")
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_sub_day(self):
        delta_1d = pd.to_timedelta(1, unit='D')
        delta_0d = pd.to_timedelta(0, unit='D')
        delta_1s = pd.to_timedelta(1, unit='s')
        delta_500ms = pd.to_timedelta(500, unit='ms')

        drepr = lambda x: x._repr_base(format='sub_day')
        self.assertEqual(drepr(delta_1d), "1 days")
        self.assertEqual(drepr(-delta_1d), "-1 days")
        self.assertEqual(drepr(delta_0d), "00:00:00")
        self.assertEqual(drepr(delta_1s), "00:00:01")
        self.assertEqual(drepr(delta_500ms), "00:00:00.500000")
        self.assertEqual(drepr(delta_1d + delta_1s), "1 days 00:00:01")
        self.assertEqual(
            drepr(delta_1d + delta_500ms), "1 days 00:00:00.500000")
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_long(self):
        delta_1d = pd.to_timedelta(1, unit='D')
        delta_0d = pd.to_timedelta(0, unit='D')
        delta_1s = pd.to_timedelta(1, unit='s')
        delta_500ms = pd.to_timedelta(500, unit='ms')

        drepr = lambda x: x._repr_base(format='long')
        self.assertEqual(drepr(delta_1d), "1 days 00:00:00")
        self.assertEqual(drepr(-delta_1d), "-1 days +00:00:00")
        self.assertEqual(drepr(delta_0d), "0 days 00:00:00")
        self.assertEqual(drepr(delta_1s), "0 days 00:00:01")
        self.assertEqual(drepr(delta_500ms), "0 days 00:00:00.500000")
        self.assertEqual(drepr(delta_1d + delta_1s), "1 days 00:00:01")
        self.assertEqual(
            drepr(delta_1d + delta_500ms), "1 days 00:00:00.500000")
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_all(self):
        delta_1d = pd.to_timedelta(1, unit='D')
        delta_0d = pd.to_timedelta(0, unit='D')
        delta_1ns = pd.to_timedelta(1, unit='ns')

        drepr = lambda x: x._repr_base(format='all')
        self.assertEqual(drepr(delta_1d), "1 days 00:00:00.000000000")
        self.assertEqual(drepr(delta_0d), "0 days 00:00:00.000000000")
        self.assertEqual(drepr(delta_1ns), "0 days 00:00:00.000000001")
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_days(self):
        x = pd.to_timedelta(list(range(5)) + [pd.NaT], unit='D')
        result = fmt.Timedelta64Formatter(x, box=True).get_result()
        self.assertEqual(result[0].strip(), "'0 days'")
        self.assertEqual(result[1].strip(), "'1 days'")

        result = fmt.Timedelta64Formatter(x[1:2], box=True).get_result()
        self.assertEqual(result[0].strip(), "'1 days'")

        result = fmt.Timedelta64Formatter(x, box=False).get_result()
        self.assertEqual(result[0].strip(), "0 days")
        self.assertEqual(result[1].strip(), "1 days")

        result = fmt.Timedelta64Formatter(x[1:2], box=False).get_result()
        self.assertEqual(result[0].strip(), "1 days")
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_subdays(self):
        y = pd.to_timedelta(list(range(5)) + [pd.NaT], unit='s')
        result = fmt.Timedelta64Formatter(y, box=True).get_result()
        self.assertEqual(result[0].strip(), "'00:00:00'")
        self.assertEqual(result[1].strip(), "'00:00:01'")
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_subdays_neg(self):
        y = pd.to_timedelta(list(range(5)) + [pd.NaT], unit='s')
        result = fmt.Timedelta64Formatter(-y, box=True).get_result()
        self.assertEqual(result[0].strip(), "'00:00:00'")
        self.assertEqual(result[1].strip(), "'-1 days +23:59:59'")
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_zero(self):
        x = pd.to_timedelta(list(range(1)) + [pd.NaT], unit='D')
        result = fmt.Timedelta64Formatter(x, box=True).get_result()
        self.assertEqual(result[0].strip(), "'0 days'")

        x = pd.to_timedelta(list(range(1)), unit='D')
        result = fmt.Timedelta64Formatter(x, box=True).get_result()
        self.assertEqual(result[0].strip(), "'0 days'")
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_describe_timedelta(self):
        df = DataFrame({"td": pd.to_timedelta(np.arange(24) % 20, "D")})
        self.assertTrue(df.describe().loc["mean"][0] == pd.to_timedelta(
            "8d4h"))
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def _chunk_to_dataframe(self):

        n = self._current_row_in_chunk_index
        m = self._current_row_in_file_index
        ix = range(m - n, m)
        rslt = pd.DataFrame(index=ix)

        js, jb = 0, 0
        for j in range(self.column_count):

            name = self.column_names[j]

            if self.column_types[j] == b'd':
                rslt[name] = self._byte_chunk[jb, :].view(
                    dtype=self.byte_order + 'd')
                rslt[name] = np.asarray(rslt[name], dtype=np.float64)
                if self.convert_dates and (self.column_formats[j] == "MMDDYY"):
                    epoch = pd.datetime(1960, 1, 1)
                    rslt[name] = epoch + pd.to_timedelta(rslt[name], unit='d')
                jb += 1
            elif self.column_types[j] == b's':
                rslt[name] = self._string_chunk[js, :]
                rslt[name] = rslt[name].apply(lambda x: x.rstrip(b'\x00 '))
                if self.encoding is not None:
                    rslt[name] = rslt[name].apply(
                        lambda x: x.decode(encoding=self.encoding))
                if self.blank_missing:
                    ii = rslt[name].str.len() == 0
                    rslt.loc[ii, name] = np.nan
                js += 1
            else:
                raise ValueError("unknown column type %s" %
                                 self.column_types[j])

        return rslt
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_timedelta(self):

        # see #6921
        df = to_timedelta(
            Series(['00:00:01', '00:00:03'], name='foo')).to_frame()
        with tm.assert_produces_warning(UserWarning):
            df.to_sql('test_timedelta', self.conn)
        result = sql.read_sql_query('SELECT * FROM test_timedelta', self.conn)
        tm.assert_series_equal(result['foo'], df['foo'].astype('int64'))
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_timedelta(self):
        converter = lambda x: pd.to_timedelta(x, unit='ms')

        s = Series([timedelta(23), timedelta(seconds=5)])
        self.assertEqual(s.dtype, 'timedelta64[ns]')
        # index will be float dtype
        assert_series_equal(s, pd.read_json(s.to_json(), typ='series')
                            .apply(converter),
                            check_index_type=False)

        s = Series([timedelta(23), timedelta(seconds=5)],
                   index=pd.Index([0, 1], dtype=float))
        self.assertEqual(s.dtype, 'timedelta64[ns]')
        assert_series_equal(s, pd.read_json(
            s.to_json(), typ='series').apply(converter))

        frame = DataFrame([timedelta(23), timedelta(seconds=5)])
        self.assertEqual(frame[0].dtype, 'timedelta64[ns]')
        assert_frame_equal(frame, pd.read_json(frame.to_json())
                           .apply(converter),
                           check_index_type=False,
                           check_column_type=False)

        frame = DataFrame({'a': [timedelta(days=23), timedelta(seconds=5)],
                           'b': [1, 2],
                           'c': pd.date_range(start='20130101', periods=2)})

        result = pd.read_json(frame.to_json(date_unit='ns'))
        result['a'] = pd.to_timedelta(result.a, unit='ns')
        result['c'] = pd.to_datetime(result.c)
        assert_frame_equal(frame, result, check_index_type=False)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_concat_timedelta64_block(self):
        from pandas import to_timedelta

        rng = to_timedelta(np.arange(10), unit='s')

        df = DataFrame({'time': rng})

        result = concat([df, df])
        self.assertTrue((result.iloc[:10]['time'] == rng).all())
        self.assertTrue((result.iloc[10:]['time'] == rng).all())
项目:treadmill    作者:Morgan-Stanley    | 项目源码 | 文件源码
def init():
    """Return top level command handler."""

    @click.command()
    @cli.handle_exceptions(restclient.CLI_REST_EXCEPTIONS)
    @click.option('--match', help='Server name pattern match')
    @click.option('--full', is_flag=True, default=False)
    @click.pass_context
    def apps(ctx, match, full):
        """View apps report."""
        report = fetch_report(ctx.obj.get('api'), 'apps', match)
        # Replace integer N/As
        for col in ['identity', 'expires', 'lease', 'data_retention']:
            report.loc[report[col] == -1, col] = ''
        # Convert to datetimes
        for col in ['expires']:
            report[col] = pd.to_datetime(report[col], unit='s')
        # Convert to timedeltas
        for col in ['lease', 'data_retention']:
            report[col] = pd.to_timedelta(report[col], unit='s')
        report = report.fillna('')

        if not full:
            report = report[[
                'instance', 'allocation', 'partition', 'server',
                'mem', 'cpu', 'disk'
            ]]

        print_report(report)

    return apps
项目:python_data_tools    作者:king3366ster    | 项目源码 | 文件源码
def format_date_to_datetime(self, df, t_date = None):
        if t_date is None:
            t_date = dataTime.datetimeRelative(delta = 0)
        t_date = t_date.replace(' 00:00:00', '')
        df_new = df.copy()
        df_new.insert(0, 'datetime', t_date)
        df_new['datetime'] = pd.to_datetime(df_new['datetime'])
        df_new['time'] = pd.to_timedelta(df_new['time'])
        df_new['datetime'] = df_new['datetime'] + df_new['time']
        df_new = df_new.sort_values(['datetime'], ascending=[True])
        del df_new['time']

        return df_new
# ????
    # ?????
        # code???????6?????????????sh=???? sz=???? hs300=??300?? sz50=??50 zxb=??? cyb=????
        # start????????YYYY-MM-DD
        # end????????YYYY-MM-DD
        # ktype??????D=?k? W=? M=? 5=5?? 15=15?? 30=30?? 60=60??????D
        # retry_count???????????????3
        # pause:???????????0

    # ??????
        # date???
        # open????
        # high????
        # close????
        # low????
        # volume????
        # price_change?????
        # p_change????
        # ma5?5???
        # ma10?10???
        # ma20:20???
        # v_ma5:5???
        # v_ma10:10???
        # v_ma20:20???
        # turnover:???[???????]
项目:scikit-discovery    作者:MITHaystack    | 项目源码 | 文件源码
def process(self, obj_data):
        '''
        Apply the MIDAS estimator to generate velocity estimates

        Adds the result to the data wrapper

        @param obj_data: Data wrapper
        '''

        if self.column_names == None:
            column_names = obj_data.getDefaultColumns()
        else:
            column_names = self.column_names

        time_diff = pd.to_timedelta('365d')
        results = dict()
        for label, data in obj_data.getIterator():
            start_date = data.index[0]
            end_date = data.index[-1]
            for column in column_names:
                start_data = data.loc[start_date:(end_date-time_diff), column]
                end_data = data.loc[start_date+time_diff:end_date, column]

                offsets = end_data.values - start_data.values
                offsets = offsets[~np.isnan(offsets)]
                med_off = np.median(offsets)
                mad_off = mad(offsets)

                cut_offsets = offsets[np.logical_and(offsets < med_off + 2*mad_off, 
                                                     offsets > med_off - 2*mad_off)]
                final_vel = np.median(cut_offsets)
                final_unc = np.sqrt(np.pi/2) * mad(cut_offsets) / np.sqrt(len(cut_offsets))

                results[label] = pd.DataFrame([final_vel,final_unc], ['velocity', 'uncertainty'] ,[column])

        obj_data.addResult(self.str_description, pd.Panel.fromDict(results,orient='minor'))
项目:qutils    作者:Raychee    | 项目源码 | 文件源码
def to_seconds(timedelta_str):
    return to_timedelta(timedelta_str).total_seconds()
项目:qutils    作者:Raychee    | 项目源码 | 文件源码
def to_timedelta(timedelta_repr):
    return pd.to_timedelta(str(timedelta_repr), unit='s')
项目:qutils    作者:Raychee    | 项目源码 | 文件源码
def test_timedelta_to_human(self):
        for td in timedelta(days=1, seconds=3900), pd.to_timedelta('1d1h5m'):
            self.assertEqual('1.05 days', timedelta_to_human(td, precision=2))
            self.assertEqual('1.0 day', timedelta_to_human(td, precision=1))
        for td in timedelta(days=-1, seconds=-3900), pd.to_timedelta('-1d1h5m'):
            self.assertEqual('1.05 days ago', timedelta_to_human(td, precision=2))
            self.assertEqual('1.0 day ago', timedelta_to_human(td, precision=1))
项目:JDcontest    作者:zsyandjyhouse    | 项目源码 | 文件源码
def get_accumulate_action_feat(start_time, end_time,action_data):
    actions=action_data[(action_data['time']>=start_time)&(action_data['time']<=end_time)]
    action_data['time'] = pd.to_datetime(action_data['time'],format='%Y-%m-%d %H:%M:%S')
    df = pd.get_dummies(actions['type'], prefix='action')
    actions = pd.concat([actions, df], axis=1) # type: pd.DataFrame
    #?????????
    actions['weights'] = actions['time'].map(lambda x: pd.to_timedelta(end_time-x))
    #actions['weights'] = time.strptime(end_date, '%Y-%m-%d') - actions['datetime']
    actions['weights'] = actions['weights'].map(lambda x: math.exp(-x.days))
    print actions.head(10)
    actions['action_1'] = actions['action_1'] * actions['weights']
    actions['action_2'] = actions['action_2'] * actions['weights']
    actions['action_3'] = actions['action_3'] * actions['weights']
    actions['action_4'] = actions['action_4'] * actions['weights']
    actions['action_5'] = actions['action_5'] * actions['weights']
    actions['action_6'] = actions['action_6'] * actions['weights']
    del actions['model_id']
    del actions['time']
    del actions['weights']
    del actions['cate']
    del actions['brand']
    actions = actions.groupby(['user_id', 'sku_id'], as_index=False).sum()
    actions.fillna(0,inplace=True)

    actions['action_1256']=actions['action_1']+actions['action_2']+actions['action_5']+actions['action_6']
    actions['action_1256_d_4']=actions['action_4']/actions['action_1256']
    del actions['type']
    return actions