Python numpy 模块,object_() 实例源码


def categorize_columns(self, df):
        """Categorize columns of dataframe by data type

        :param df: input (pandas) data frame

        # check presence and data type of requested columns
        # sort columns into numerical, timestamp and category based
        for c in self.columns:
            for col in c:
                if col not in df.columns:
                    raise KeyError('column "{0:s}" not in dataframe "{1:s}"'.format(col, self.read_key))
                dt = self.get_data_type(df, col)
                if col not in self.var_dtype:
                    self.var_dtype[col] = dt.type
                    if (self.var_dtype[col] is np.string_) or (self.var_dtype[col] is np.object_):
                        self.var_dtype[col] = str
                if not any(dt in types for types in (STRING_SUBSTR, NUMERIC_SUBSTR, TIME_SUBSTR)):
                    raise TypeError('cannot process column "{0:s}" of data type "{1:s}"'.format(col, str(dt)))
                is_number = isinstance(dt.type(), np.number)
                is_timestamp = isinstance(dt.type(), np.datetime64)
                colset = self.num_cols if is_number else self.dt_cols if is_timestamp else self.str_cols
                if col not in colset:
                self.log().debug('Data type of column "%s" is "%s"', col, self.var_dtype[col])
def test_object_array_refcount_self_assign(self, level=rlevel):
        # Ticket #711
        class VictimObject(object):
            deleted = False

            def __del__(self):
                self.deleted = True

        d = VictimObject()
        arr = np.zeros(5, dtype=np.object_)
        arr[:] = d
        del d
        arr[:] = arr  # refcount of 'd' might hit zero here
        assert_(not arr[0].deleted)
        arr[:] = arr  # trying to induce a segfault by doing it again...
        assert_(not arr[0].deleted)
def batch_loader(self, rnd_gen=np.random, shuffle=True):
        """load_mbs yields a new minibatch at each iteration"""
        batchsize = self.batchsize
        inds = np.arange(self.n_samples)
        if shuffle:
        n_mbs = / batchsize))

        x = np.zeros(self.X_shape, np.float32)
        y = np.zeros(self.y_shape, np.float32)
        ids = np.empty((batchsize,), np.object_)

        for m in range(n_mbs):
            start = m * batchsize
            end = (m + 1) * batchsize
            if end > self.n_samples:
                end = self.n_samples
            mb_slice = slice(start, end)

            x[:end - start, :] = self.x[inds[mb_slice], :]
            y[:end - start, :] = self.y[inds[mb_slice], :]
            ids[:end - start] = self.ids[inds[mb_slice]]

            yield dict(X=x, y=y, ID=ids)
def pad_1d(values, limit=None, mask=None, dtype=None):

    if dtype is None:
        dtype = values.dtype
    _method = None
    if com.is_float_dtype(values):
        _method = getattr(algos, 'pad_inplace_%s' %, None)
    elif dtype in com._DATELIKE_DTYPES or com.is_datetime64_dtype(values):
        _method = _pad_1d_datetime
    elif com.is_integer_dtype(values):
        values = com._ensure_float64(values)
        _method = algos.pad_inplace_float64
    elif values.dtype == np.object_:
        _method = algos.pad_inplace_object

    if _method is None:
        raise ValueError('Invalid dtype for pad_1d [%s]' %

    if mask is None:
        mask = com.isnull(values)
    mask = mask.view(np.uint8)
    _method(values, mask, limit=limit)
    return values
def backfill_1d(values, limit=None, mask=None, dtype=None):

    if dtype is None:
        dtype = values.dtype
    _method = None
    if com.is_float_dtype(values):
        _method = getattr(algos, 'backfill_inplace_%s' %, None)
    elif dtype in com._DATELIKE_DTYPES or com.is_datetime64_dtype(values):
        _method = _backfill_1d_datetime
    elif com.is_integer_dtype(values):
        values = com._ensure_float64(values)
        _method = algos.backfill_inplace_float64
    elif values.dtype == np.object_:
        _method = algos.backfill_inplace_object

    if _method is None:
        raise ValueError('Invalid dtype for backfill_1d [%s]' %

    if mask is None:
        mask = com.isnull(values)
    mask = mask.view(np.uint8)

    _method(values, mask, limit=limit)
    return values
def is_bool_indexer(key):
    if isinstance(key, (ABCSeries, np.ndarray)):
        if key.dtype == np.object_:
            key = np.asarray(_values_from_object(key))

            if not lib.is_bool_array(key):
                if isnull(key).any():
                    raise ValueError('cannot index with vector containing '
                                     'NA / NaN values')
                return False
            return True
        elif key.dtype == np.bool_:
            return True
    elif isinstance(key, list):
            arr = np.asarray(key)
            return arr.dtype == np.bool_ and len(arr) == len(key)
        except TypeError:  # pragma: no cover
            return False

    return False
def test_fromValue(self):

        nans = Series(np.NaN, index=self.ts.index)
        self.assertEqual(nans.dtype, np.float_)
        self.assertEqual(len(nans), len(self.ts))

        strings = Series('foo', index=self.ts.index)
        self.assertEqual(strings.dtype, np.object_)
        self.assertEqual(len(strings), len(self.ts))

        d =
        dates = Series(d, index=self.ts.index)
        self.assertEqual(dates.dtype, 'M8[ns]')
        self.assertEqual(len(dates), len(self.ts))

        # GH12336
        # Test construction of categorical series from value
        categorical = Series(0, index=self.ts.index, dtype="category")
        expected = Series(0, index=self.ts.index).astype("category")
        self.assertEqual(categorical.dtype, 'category')
        self.assertEqual(len(categorical), len(self.ts))
        tm.assert_series_equal(categorical, expected)
def test_astype_datetimes(self):
        import pandas.tslib as tslib

        s = Series(tslib.iNaT, dtype='M8[ns]', index=lrange(5))
        s = s.astype('O')
        self.assertEqual(s.dtype, np.object_)

        s = Series([datetime(2001, 1, 2, 0, 0)])
        s = s.astype('O')
        self.assertEqual(s.dtype, np.object_)

        s = Series([datetime(2001, 1, 2, 0, 0) for i in range(3)])
        s[1] = np.nan
        self.assertEqual(s.dtype, 'M8[ns]')
        s = s.astype('O')
        self.assertEqual(s.dtype, np.object_)
def test_convert_objects_leave_decimal_alone(self):

        from decimal import Decimal

        s = Series(lrange(5))
        labels = np.array(['a', 'b', 'c', 'd', 'e'], dtype='O')

        def convert_fast(x):
            return Decimal(str(x.mean()))

        def convert_force_pure(x):
            # base will be length 0
            assert (len(x.base) > 0)
            return Decimal(str(x.mean()))

        grouped = s.groupby(labels)

        result = grouped.agg(convert_fast)
        self.assertEqual(result.dtype, np.object_)
        tm.assertIsInstance(result[0], Decimal)

        result = grouped.agg(convert_force_pure)
        self.assertEqual(result.dtype, np.object_)
        tm.assertIsInstance(result[0], Decimal)
def test_set_value_resize(self):

        res = self.frame.set_value('foobar', 'B', 0)
        self.assertIs(res, self.frame)
        self.assertEqual(res.index[-1], 'foobar')
        self.assertEqual(res.get_value('foobar', 'B'), 0)

        self.frame.loc['foobar', 'qux'] = 0
        self.assertEqual(self.frame.get_value('foobar', 'qux'), 0)

        res = self.frame.copy()
        res3 = res.set_value('foobar', 'baz', 'sam')
        self.assertEqual(res3['baz'].dtype, np.object_)

        res = self.frame.copy()
        res3 = res.set_value('foobar', 'baz', True)
        self.assertEqual(res3['baz'].dtype, np.object_)

        res = self.frame.copy()
        res3 = res.set_value('foobar', 'baz', 5)
        self.assertRaises(ValueError, res3.set_value, 'foobar', 'baz', 'sam')
def test_stat_operators_attempt_obj_array(self):
        data = {
            'a': [-0.00049987540199591344, -0.0016467257772919831,
            'b': [-0, -0, 0.0],
            'c': [0.00031111847529610595, 0.0014902627951905339,
        df1 = DataFrame(data, index=['foo', 'bar', 'baz'],
        methods = ['sum', 'mean', 'prod', 'var', 'std', 'skew', 'min', 'max']

        # GH #676
        df2 = DataFrame({0: [np.nan, 2], 1: [np.nan, 3],
                         2: [np.nan, 4]}, dtype=object)

        for df in [df1, df2]:
            for meth in methods:
                self.assertEqual(df.values.dtype, np.object_)
                result = getattr(df, meth)(1)
                expected = getattr(df.astype('f8'), meth)(1)

                if not tm._incompat_bottleneck_version(meth):
                    assert_series_equal(result, expected)
def test_constructor_dict_cast(self):
        # cast float tests
        test_data = {
            'A': {'1': 1, '2': 2},
            'B': {'1': '1', '2': '2', '3': '3'},
        frame = DataFrame(test_data, dtype=float)
        self.assertEqual(len(frame), 3)
        self.assertEqual(frame['B'].dtype, np.float64)
        self.assertEqual(frame['A'].dtype, np.float64)

        frame = DataFrame(test_data)
        self.assertEqual(len(frame), 3)
        self.assertEqual(frame['B'].dtype, np.object_)
        self.assertEqual(frame['A'].dtype, np.float64)

        # can't cast to float
        test_data = {
            'A': dict(zip(range(20), tm.makeStringIndex(20))),
            'B': dict(zip(range(15), randn(15)))
        frame = DataFrame(test_data, dtype=float)
        self.assertEqual(len(frame), 20)
        self.assertEqual(frame['A'].dtype, np.object_)
        self.assertEqual(frame['B'].dtype, np.float64)
def test_transpose(self):
        frame = self.frame
        dft = frame.T
        for idx, series in compat.iteritems(dft):
            for col, value in compat.iteritems(series):
                if np.isnan(value):
                    self.assertEqual(value, frame[col][idx])

        # mixed type
        index, data = tm.getMixedTypeDict()
        mixed = DataFrame(data, index=index)

        mixed_T = mixed.T
        for col, s in compat.iteritems(mixed_T):
            self.assertEqual(s.dtype, np.object_)
def test_nan_handling(self):

        # Nans are represented as -1 in labels
        s = Series(Categorical(["a", "b", np.nan, "a"]))
        self.assert_numpy_array_equal(, np.array(["a", "b"]))
        self.assert_numpy_array_equal(, np.array([0, 1, -1, 0]))

        # If categories have nan included, the label should point to that
        # instead
        with tm.assert_produces_warning(FutureWarning):
            s2 = Series(Categorical(
                ["a", "b", np.nan, "a"], categories=["a", "b", np.nan]))
        self.assert_numpy_array_equal(, np.array(
            ["a", "b", np.nan], dtype=np.object_))
        self.assert_numpy_array_equal(, np.array([0, 1, 2, 0]))

        # Changing categories should also make the replaced category np.nan
        s3 = Series(Categorical(["a", "b", "c", "a"]))
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
   = ["a", "b", np.nan]
        self.assert_numpy_array_equal(, np.array(
            ["a", "b", np.nan], dtype=np.object_))
        self.assert_numpy_array_equal(, np.array([0, 1, 2, 0]))
def reset_minmax(self):
            data = self.get_values(sample=True)
            color_value = self.color_func(data) if self.color_func is not None else data
            if color_value.dtype.type == np.object_:
                color_value = color_value[is_number_value(color_value)]
                # this is probably broken if we have complex numbers stored as objects but I don't foresee
                # this case happening anytime soon.
                color_value = color_value.astype(float)
            # ignore nan, -inf, inf (setting them to 0 or to very large numbers is not an option)
            color_value = color_value[np.isfinite(color_value)]
            self.vmin = float(np.min(color_value))
            self.vmax = float(np.max(color_value))
            self.bgcolor_possible = True
        # ValueError for empty arrays, TypeError for object/string arrays
        except (TypeError, ValueError):
            self.vmin = None
            self.vmax = None
            self.bgcolor_possible = False
def normalize_attr_strings(a: np.ndarray) -> np.ndarray:
    Take an np.ndarray of all kinds of string-like elements, and return an array of ascii (np.string_) objects
    if np.issubdtype(a.dtype, np.object_):
        if np.all([type(x) is str for x in a]) or np.all([type(x) is np.str_ for x in a]) or np.all([type(x) is np.unicode_ for x in a]):
            return np.array([x.encode('ascii', 'xmlcharrefreplace') for x in a])
        elif np.all([type(x) is np.string_ for x in a]) or np.all([type(x) is np.bytes_ for x in a]):
            return a.astype("string_")
            raise ValueError("Arbitrary numpy object arrays not supported (all elements must be string objects).")
    elif np.issubdtype(a.dtype, np.string_) or np.issubdtype(a.dtype, np.object_):
        return a
    elif np.issubdtype(a.dtype, np.str_) or np.issubdtype(a.dtype, np.unicode_):
        return np.array([x.encode('ascii', 'xmlcharrefreplace') for x in a])
        raise ValueError("String values must be object, ascii or unicode.")
def numpy_to_transform(arr):
    from tf import transformations

    shape, rest = arr.shape[:-2], arr.shape[-2:]
    assert rest == (4,4)

    if len(shape) == 0:
        trans = transformations.translation_from_matrix(arr)
        quat = transformations.quaternion_from_matrix(arr)

        return Transform(
        res = np.empty(shape, dtype=np.object_)
        for idx in np.ndindex(shape):
            res[idx] = Transform(
项目:ros_numpy    作者:eric-wieser    | 项目源码 | 文件源码
def numpy_to_pose(arr):
    from tf import transformations

    shape, rest = arr.shape[:-2], arr.shape[-2:]
    assert rest == (4,4)

    if len(shape) == 0:
        trans = transformations.translation_from_matrix(arr)
        quat = transformations.quaternion_from_matrix(arr)

        return Pose(
        res = np.empty(shape, dtype=np.object_)
        for idx in np.ndindex(shape):
            res[idx] = Pose(
def initialize(self):
        """Initialize HistogramFillerBase"""

        # check basic attribute settings
        assert isinstance(self.read_key, str) and len(self.read_key), 'read_key has not been set correctly'
        if self.store_key is not None:
            assert isinstance(self.store_key, str) and len(self.store_key), 'store_key has not been set to string'

        # default histogram creation is at execute(). Storage at finalize is useful for
        # looping over datasets.
        if self.store_at_finalize:
            self.log().debug('Storing (and possible post-processing) at finalize, not execute')

        # check that columns are set correctly.
        for i, c in enumerate(self.columns):
            if isinstance(c, str):
                self.columns[i] = [c]
            if not isinstance(self.columns[i], list):
                raise TypeError('columns "{}" needs to be a string or list of strings'.format(self.columns[i]))

        # check for supported data types
        for k in self.var_dtype.keys():
                self.var_dtype[k] = np.dtype(self.var_dtype[k]).type
                if self.var_dtype[k] is np.string_ or self.var_dtype[k] is np.object_:
                    self.var_dtype[k] = str
            except BaseException:
                raise RuntimeError('unknown assigned datatype to variable "{}"'.format(k))

        return StatusCode.Success
def initialize(self):
        """Initialize FixPandasDataFrame"""

        self.check_arg_types(read_key=str, store_key=str)
        self.check_arg_types(recurse=True, allow_none=True, original_columns=str)

        if not isinstance(self.cleanup_string_columns, list) and not isinstance(self.cleanup_string_columns, bool):
            raise AssertionError('cleanup_string_columns should be a list of column names or boolean.')

        if self.read_key == self.store_key:
            self.inplace = True
            self.log().debug('store_key equals read_key; inplace has been set to "True"')

        if self.inplace:
            self.store_key = self.read_key
            self.log().debug('store_key has been set to read_key "%s"', self.store_key)

        if not self.store_key:
            self.store_key = self.read_key + '_fix'
            self.log().debug('store_key has been set to "%s"', self.store_key)

        # check data types
        for k in self.var_dtype.keys():
            if k not in self.contaminated_columns:
                # convert to consistent types
                dt = np.dtype(self.var_dtype[k]).type
                if dt is np.str_ or dt is np.object_:
                    dt = str
                self.var_dtype[k] = dt
            except BaseException:
                raise TypeError('unknown assigned datatype to variable "%s"' % k)

def test_splitlines(self):
        A = np.char.array(['abc\nfds\nwer']).splitlines()
        assert_(issubclass(A.dtype.type, np.object_))
        assert_(A.shape == (1,))
        assert_(len(A[0]) == 3)
项目:radar    作者:amoose136    | 项目源码 | 文件源码
def split_df_col2cols(dataframe, split_col, split_char, new_colnames, delete_old = False):
    # # Splits a column into multiple columns
    # dataframe : pandas dataframe to be processed
    # split_col : chr string of the column name to be split
    # split_char : chr to split the col on
    # new_colnames : list of new name for the columns
    # delete_old : logical True / False, remove original column?
    # ~~~~~~~~~~~~~~~~ #
    import pandas as pd
    import numpy as np
    # pl.my_debugger(globals().copy())
    # my_debugger(locals().copy())
    # save the split column as a separate object
    new_cols = dataframe[split_col].astype(np.object_).str.split(split_char).apply(pd.Series, 1)
    # if all values were NaN, no split occured, only one col exists still
    if len(new_cols.columns) < len(new_colnames):
        # create the missing cols, fill with NaN
        for i in range(len(new_cols.columns), len(new_colnames)):
            new_cols[new_colnames[i]] = np.nan
    # rename the cols
    new_cols.columns = new_colnames
    # remove the original column from the df
    if delete_old is True:
        del dataframe[split_col]
    # merge with df
    new_df = dataframe.join(new_cols)
    return new_df
项目:reportIT    作者:stevekm    | 项目源码 | 文件源码
def split_df_col2cols(dataframe, split_col, split_char, new_colnames, delete_old = False):
    # # Splits a column into multiple columns
    # dataframe : pandas dataframe to be processed
    # split_col : chr string of the column name to be split
    # split_char : chr to split the col on
    # new_colnames : list of new name for the columns
    # delete_old : logical True / False, remove original column?
    # ~~~~~~~~~~~~~~~~ #
    import pandas as pd
    import numpy as np
    # pl.my_debugger(globals().copy())
    # my_debugger(locals().copy())
    # save the split column as a separate object
    new_cols = dataframe[split_col].astype(np.object_).str.split(split_char).apply(pd.Series, 1)
    # if all values were NaN, no split occured, only one col exists still
    if len(new_cols.columns) < len(new_colnames):
        # create the missing cols, fill with NaN
        for i in range(len(new_cols.columns), len(new_colnames)):
            new_cols[new_colnames[i]] = np.nan
    # rename the cols
    new_cols.columns = new_colnames
    # remove the original column from the df
    if delete_old is True:
        del dataframe[split_col]
    # merge with df
    new_df = dataframe.join(new_cols)
    return new_df
项目:reportIT    作者:stevekm    | 项目源码 | 文件源码
def split_df_col2cols(dataframe, split_col, split_char, new_colnames, delete_old = False):
    # # Splits a column into multiple columns
    # dataframe : pandas dataframe to be processed
    # split_col : chr string of the column name to be split
    # split_char : chr to split the col on
    # new_colnames : list of new name for the columns
    # delete_old : logical True / False, remove original column?
    # ~~~~~~~~~~~~~~~~ #
    import pandas as pd
    import numpy as np
    # pl.my_debugger(globals().copy())
    # my_debugger(locals().copy())
    # save the split column as a separate object
    new_cols = dataframe[split_col].astype(np.object_).str.split(split_char).apply(pd.Series, 1)
    # if all values were NaN, no split occured, only one col exists still
    if len(new_cols.columns) < len(new_colnames):
        # create the missing cols, fill with NaN
        for i in range(len(new_cols.columns), len(new_colnames)):
            new_cols[new_colnames[i]] = np.nan
    # rename the cols
    new_cols.columns = new_colnames
    # remove the original column from the df
    if delete_old is True:
        del dataframe[split_col]
    # merge with df
    new_df = dataframe.join(new_cols)
    return new_df
项目:reportIT    作者:stevekm    | 项目源码 | 文件源码
def split_df_col2cols(dataframe, split_col, split_char, new_colnames, delete_old = False):
    # # Splits a column into multiple columns
    # dataframe : pandas dataframe to be processed
    # split_col : chr string of the column name to be split
    # split_char : chr to split the col on
    # new_colnames : list of new name for the columns
    # delete_old : logical True / False, remove original column?
    # ~~~~~~~~~~~~~~~~ #
    import pandas as pd
    import numpy as np
    # pl.my_debugger(globals().copy())
    # my_debugger(locals().copy())
    # save the split column as a separate object
    new_cols = dataframe[split_col].astype(np.object_).str.split(split_char).apply(pd.Series, 1)
    # if all values were NaN, no split occured, only one col exists still
    if len(new_cols.columns) < len(new_colnames):
        # create the missing cols, fill with NaN
        for i in range(len(new_cols.columns), len(new_colnames)):
            new_cols[new_colnames[i]] = np.nan
    # rename the cols
    new_cols.columns = new_colnames
    # remove the original column from the df
    if delete_old is True:
        del dataframe[split_col]
    # merge with df
    new_df = dataframe.join(new_cols)
    return new_df
项目:reportIT    作者:stevekm    | 项目源码 | 文件源码
def split_df_col2cols(dataframe, split_col, split_char, new_colnames, delete_old = False):
    # # Splits a column into multiple columns
    # dataframe : pandas dataframe to be processed
    # split_col : chr string of the column name to be split
    # split_char : chr to split the col on
    # new_colnames : list of new name for the columns
    # delete_old : logical True / False, remove original column?
    # ~~~~~~~~~~~~~~~~ #
    import pandas as pd
    import numpy as np
    # pl.my_debugger(globals().copy())
    # my_debugger(locals().copy())
    # save the split column as a separate object
    new_cols = dataframe[split_col].astype(np.object_).str.split(split_char).apply(pd.Series, 1)
    # if all values were NaN, no split occured, only one col exists still
    if len(new_cols.columns) < len(new_colnames):
        # create the missing cols, fill with NaN
        for i in range(len(new_cols.columns), len(new_colnames)):
            new_cols[new_colnames[i]] = np.nan
    # rename the cols
    new_cols.columns = new_colnames
    # remove the original column from the df
    if delete_old is True:
        del dataframe[split_col]
    # merge with df
    new_df = dataframe.join(new_cols)
    return new_df
def test_converters_cornercases(self):
