Python pandas 模块,MultiIndex() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用pandas.MultiIndex()

项目:fireant    作者:kayak    | 项目源码 | 文件源码
def _set_display_options(self, dataframe, display_schema):
        """
        Replaces the dimension options with those that the user has specified manually e.g. change 'm' to 'mobile'
        """
        dataframe = dataframe.copy()

        for key, dimension in display_schema['dimensions'].items():
            if 'display_options' in dimension:
                display_values = [dimension['display_options'].get(value, value)
                                  for value in dataframe.index.get_level_values(key).unique()]

                if not display_values:
                    continue

                if isinstance(dataframe.index, pd.MultiIndex):
                    dataframe.index.set_levels(display_values, key, inplace=True)

                else:
                    dataframe.index = pd.Index(display_values)

        return dataframe
项目:q2-diversity    作者:qiime2    | 项目源码 | 文件源码
def test_three_iterations_no_metadata(self):
        columns = pd.MultiIndex.from_product([[1, 200], [1, 2, 3]],
                                             names=['depth', 'iter'])
        data = pd.DataFrame(data=[[1, 2, 3, 4, 5, 6], [1, 2, 3, 4, 5, 6],
                                  [1, 2, 3, 4, 5, 6]],
                            columns=columns, index=['S1', 'S2', 'S3'])

        # No counts provided because no metadata
        obs = _compute_summary(data, 'sample-id')

        d = [['S1', 1,   1, 1., 1.04, 1.18, 1.5, 2., 2.5, 2.82, 2.96, 3.],
             ['S1', 200, 1, 4., 4.04, 4.18, 4.5, 5., 5.5, 5.82, 5.96, 6.],
             ['S2', 1,   1, 1., 1.04, 1.18, 1.5, 2., 2.5, 2.82, 2.96, 3.],
             ['S2', 200, 1, 4., 4.04, 4.18, 4.5, 5., 5.5, 5.82, 5.96, 6.],
             ['S3', 1,   1, 1., 1.04, 1.18, 1.5, 2., 2.5, 2.82, 2.96, 3.],
             ['S3', 200, 1, 4., 4.04, 4.18, 4.5, 5., 5.5, 5.82, 5.96, 6.]]
        exp = pd.DataFrame(data=d, columns=['sample-id', 'depth', 'count',
                                            'min', '2%', '9%', '25%', '50%',
                                            '75%', '91%', '98%', 'max'])
        pdt.assert_frame_equal(exp, obs)
项目:q2-diversity    作者:qiime2    | 项目源码 | 文件源码
def test_two_iterations_with_metadata_were_values_are_identical(self):
        columns = pd.MultiIndex.from_product([[1, 200], [1, 2]],
                                             names=['depth', 'iter'])
        data = pd.DataFrame(data=[[3, 6, 9, 9]], columns=columns,
                            index=['milo'])

        counts = pd.DataFrame(data=[[3, 3, 3, 3]], columns=columns,
                              index=['milo'])

        obs = _compute_summary(data, 'pet', counts=counts)

        d = [
            ['milo', 1,   3., 3.06, 3.27, 3.75, 4.5,  5.25, 5.73, 5.94, 6., 3],
            ['milo', 200, 9.,   9.,   9.,   9.,  9.,    9.,   9.,   9., 9., 3],
        ]
        exp = pd.DataFrame(data=d, columns=['pet', 'depth', 'min', '2%', '9%',
                                            '25%', '50%', '75%', '91%', '98%',
                                            'max', 'count'])
        pdt.assert_frame_equal(exp, obs)
项目:q2-diversity    作者:qiime2    | 项目源码 | 文件源码
def test_some_duplicates_in_category(self):
        columns = pd.MultiIndex.from_tuples([(1, 1), (1, 2), (200, 1),
                                             (200, 2), ('pet', '')],
                                            names=['depth', 'iter'])
        data = pd.DataFrame(data=[[1, 2, 3, 4, 'russ'], [5, 6, 7, 8, 'milo'],
                                  [9, 10, 11, 12, 'russ']],
                            columns=columns, index=['S1', 'S2', 'S3'])

        obs = _reindex_with_metadata('pet', ['pet'], data)

        exp_col = pd.MultiIndex(levels=[[1, 200, 'pet'], [1, 2, '']],
                                labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
                                names=['depth', 'iter'])
        exp_ind = pd.Index(['milo', 'russ'], name='pet')
        exp = pd.DataFrame(data=[[5, 6, 7, 8], [5, 6, 7, 8]],
                           columns=exp_col, index=exp_ind)

        pdt.assert_frame_equal(exp, obs[0])

        exp = pd.DataFrame(data=[[1, 1, 1, 1], [2, 2, 2, 2]],
                           columns=exp_col, index=exp_ind)

        pdt.assert_frame_equal(exp, obs[1])
项目:q2-diversity    作者:qiime2    | 项目源码 | 文件源码
def test_all_identical(self):
        columns = pd.MultiIndex.from_tuples([(1, 1), (1, 2), (200, 1),
                                             (200, 2), ('pet', '')],
                                            names=['depth', 'iter'])
        data = pd.DataFrame(data=[[1, 2, 3, 4, 'russ'], [5, 6, 7, 8, 'russ'],
                                  [9, 10, 11, 12, 'russ']],
                            columns=columns, index=['S1', 'S2', 'S3'])

        obs = _reindex_with_metadata('pet', ['pet'], data)

        exp_col = pd.MultiIndex(levels=[[1, 200, 'pet'], [1, 2, '']],
                                labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
                                names=['depth', 'iter'])
        exp_ind = pd.Index(['russ'], name='pet')
        exp = pd.DataFrame(data=[[5, 6, 7, 8]],
                           columns=exp_col, index=exp_ind)

        pdt.assert_frame_equal(exp, obs[0])

        exp = pd.DataFrame(data=[[3, 3, 3, 3]],
                           columns=exp_col, index=exp_ind)

        pdt.assert_frame_equal(exp, obs[1])
项目:feagen    作者:ianlini    | 项目源码 | 文件源码
def write_data(self, result_dict):
        for key, result in six.iteritems(result_dict):
            is_null = False
            if isinstance(result, pd.DataFrame):
                if result.isnull().any().any():
                    is_null = True
            elif isinstance(result, pd.Series):
                if result.isnull().any():
                    is_null = True
            else:
                raise ValueError("PandasHDFDataHandler doesn't support type "
                                 "{} (in key {})".format(type(result), key))
            if is_null:
                raise ValueError("data {} have nan".format(key))
            with SimpleTimer("Writing generated data {} to hdf5 file"
                             .format(key),
                             end_in_new_line=False):
                if (isinstance(result, pd.DataFrame)
                        and isinstance(result.index, pd.MultiIndex)
                        and isinstance(result.columns, pd.MultiIndex)):
                    self.hdf_store.put(key, result)
                else:
                    self.hdf_store.put(key, result, format='table')
        self.hdf_store.flush(fsync=True)
项目:kaggle-seizure-prediction    作者:sics-lm    | 项目源码 | 文件源码
def merge_interictal_preictal(interictal, preictal):
    """
    Merges the *interictal* and *preictal* data frames to a single data frame. Also sorts the multilevel index.
    :param interictal: A data frame containing the interictal samples.
    :param preictal: A data frame containing the preictal samples.
    :return: A data frame containing both interictal and preictal data. The multilevel index of the data frame
             is sorted.
    """

    logging.info("Merging interictal and preictal datasets")
    try:
        preictal.sortlevel('segment', inplace=True)
        if isinstance(preictal.columns, pd.MultiIndex):
            preictal.sortlevel(axis=1, inplace=True)

        interictal.sortlevel('segment', inplace=True)
        if isinstance(interictal.columns, pd.MultiIndex):
            interictal.sortlevel(axis=1, inplace=True)
    except TypeError:
        logging.warning("TypeError when trying to merge interictal and preictal sets.")

    dataset = pd.concat((interictal, preictal))
    dataset.sortlevel('segment', inplace=True)
    return dataset
项目:kaggle-seizure-prediction    作者:sics-lm    | 项目源码 | 文件源码
def test_k_fold_segment_split():
    """ Test function for the k-fold segment split """
    interictal_classes = np.zeros(120)
    preictal_classes = np.ones(120)
    classes = np.concatenate((interictal_classes, preictal_classes,))
    segments = np.arange(12)
    i = np.arange(240)

    index = pd.MultiIndex.from_product([segments, np.arange(20)], names=('segment', 'start_sample'))

    dataframe = pd.DataFrame({'Preictal': classes, 'i': i}, index=index)

    # With a 6-fold cross validator, we expect each held-out fold to contain exactly 2 segments, one from each class
    cv1 = SegmentCrossValidator(dataframe, n_folds=6, shuffle=True, random_state=42)
    cv2 = SegmentCrossValidator(dataframe, n_folds=6, shuffle=True, random_state=42)

    for (training_fold1, test_fold1), (training_fold2, test_fold2) in zip(cv1, cv2):
        assert np.all(training_fold1 == training_fold1) and np.all(test_fold1 == test_fold2)
项目:kaggle-seizure-prediction    作者:sics-lm    | 项目源码 | 文件源码
def load_preictal_dataframes(feature_folder, sliding_frames=False, **kwargs):
    """
    Convenience function for loading preictal dataframes. Sets the 'Preictal' column to 1.
    :param feature_folder: The folder to load the feature data from.
    :param sliding_frames: If True, the data frame will be extended using sliding frames over the feature windows.
    :param kwargs: keyword arguments to use for loading the features.
    :return: A DataFrame of preictal data with a 'Preictal' column set to 1.
    """
    preictal = load_feature_files(feature_folder,
                                  class_name="preictal",
                                  sliding_frames=sliding_frames,
                                  **kwargs)
    preictal['Preictal'] = 1
    preictal.sortlevel('segment', inplace=True)
    if isinstance(preictal.columns, pd.MultiIndex):
        preictal.sortlevel(axis=1, inplace=True)
    return preictal
项目:kaggle-seizure-prediction    作者:sics-lm    | 项目源码 | 文件源码
def load_interictal_dataframes(feature_folder, sliding_frames=False, **kwargs):
    """
    Convenience function for loading interictal dataframes. Sets the 'Preictal' column to 0.
    :param feature_folder: The folder to load the feature data from.
    :param sliding_frames: If True, the data frame will be extended using sliding frames over the feature windows.
    :param kwargs: keyword arguments to use for loading the features.
    :return: A DataFrame of interictal data with a 'Preictal' column set to 0.
    """

    interictal = load_feature_files(feature_folder,
                                    class_name="preictal",
                                    sliding_frames=sliding_frames,
                                    **kwargs)
    interictal['Preictal'] = 0
    interictal.sortlevel('segment', inplace=True)
    if isinstance(interictal.columns, pd.MultiIndex):
        interictal.sortlevel(axis=1, inplace=True)
    return interictal
项目:kaggle-seizure-prediction    作者:sics-lm    | 项目源码 | 文件源码
def create_sliding_frames(dataframe, frame_length=12):
    """
    Wrapper for the extend_data_with_sliding_frames function which works with numpy arrays.
    This version does the data-frame conversion for us.

    :param dataframe: The dataframe to extend.
    :param frame_length: The frame length to use in the resulting extended data frame.
    :return: A new data frame where the original dataframe has been extended with sliding frames.
    """
    extended_array = extend_data_with_sliding_frames(dataframe.values)
    # We should preserve the columns of the dataframe, otherwise
    # concatenating different dataframes along the row-axis will give
    # wrong results
    window_columns = dataframe.columns
    column_index = pd.MultiIndex.from_product([range(frame_length),
                                               window_columns],
                                              names=['window', 'feature'])
    return pd.DataFrame(data=extended_array,
                        columns=column_index)
项目:meterstick    作者:google    | 项目源码 | 文件源码
def testTwoDimensionalCumulativeDistribution(self):
    df = pd.DataFrame({"X": [1, 1, 1, 2, 2, 3, 4],
                       "Y": [1, 2, 0, 1, 1, 1, 1],
                       "Z": [1, 0, 0, 0, 0, 0, 0]})
    weights = np.array([1, 1, 1, 1, 1, 1, 1])
    metric = metrics.CumulativeDistribution("X", ["Y", "Z"])
    output = metric(df, weights)
    correct = pd.DataFrame(
        np.array([1 / 14., 12 / 14., 13 / 14., 1.]),
        columns=[""],
        index=pd.MultiIndex(levels=[[0, 1, 2], [0, 1]],
                            labels=[[0, 1, 1, 2], [0, 0, 1, 0]],
                            names=["Y", "Z"]))
    self.assertTrue(all(output.index == correct.index) and
                    all(output.columns == correct.columns) and
                    all(abs(output.values - correct.values) < 1e-10))
项目:meterstick    作者:google    | 项目源码 | 文件源码
def testShuffledTwoDimensionalCumulativeDistribution(self):
    df = pd.DataFrame({"X": [1, 1, 1, 2, 2, 3, 4],
                       "Y": [1, 2, 0, 1, 1, 1, 1],
                       "Z": [1, 0, 0, 0, 0, 0, 0]})
    weights = np.array([1, 1, 1, 1, 1, 1, 1])
    metric = metrics.CumulativeDistribution("X", ["Y", "Z"])
    output = metric(df.iloc[np.random.permutation(7)], weights)
    correct = pd.DataFrame(
        np.array([1 / 14., 12 / 14., 13 / 14., 1.]),
        columns=[""],
        index=pd.MultiIndex(levels=[[0, 1, 2], [0, 1]],
                            labels=[[0, 1, 1, 2], [0, 0, 1, 0]],
                            names=["Y", "Z"]))
    self.assertTrue(all(output.index == correct.index) and
                    all(output.columns == correct.columns) and
                    all(abs(output.values - correct.values) < 1e-10))
项目:meterstick    作者:google    | 项目源码 | 文件源码
def testRelativeToSplitJackknife(self):
    data = pd.DataFrame(
        {"X": [1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8],
         "Y": [1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3],
         "Z": [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1]})

    metric = metrics.Sum("X")
    comparison = comparisons.AbsoluteDifference("Z", 0)
    se_method = standard_errors.Jackknife()
    output = core.Analyze(data).split_by("Y").relative_to(
        comparison).with_standard_errors(se_method).calculate(metric).run()

    rowindex = pd.MultiIndex(
        levels=[[1, 2, 3], [1]],
        labels=[[0, 1, 2], [0, 0, 0]],
        names=["Y", "Z"])
    correct = pd.DataFrame(
        np.array([[-3.0, np.sqrt(5 * np.var([0, -1, -2, -3, -4, -5]))],
                  [-3.0, np.sqrt(5 * np.var([3, 2, 1, -8, -7, -6]))],
                  [-3.0, np.sqrt(5 * np.var([6, 5, 4, -11, -10, -9]))]]),
        columns=("sum(X) Absolute Difference",
                 "sum(X) Absolute Difference Jackknife SE"),
        index=rowindex)

    self.assertTrue(output.equals(correct))
项目:meterstick    作者:google    | 项目源码 | 文件源码
def testDataframeRelativeTo(self):
    df = pd.DataFrame({"X": range(11),
                       "Y": np.concatenate((np.zeros(6), np.ones(5))),
                       "Z": np.concatenate((np.zeros(3), np.ones(8)))})

    metric = metrics.Distribution("X", ["Z"])
    output = core.Analyze(df).relative_to(comparisons.AbsoluteDifference(
        "Y", 0)).calculate(metric).run()

    correct = pd.DataFrame(
        np.array([-0.2, 0.2]),
        columns=["X Distribution Absolute Difference"],
        index=pd.MultiIndex(levels=[[1.], [0., 1.]],
                            labels=[[0, 0], [0, 1]],
                            names=["Y", "Z"]))

    self.assertTrue(all(output.index == correct.index) and
                    all(output.columns == correct.columns) and
                    np.all(abs(output.values - correct.values) < 1e-10))
项目:meterstick    作者:google    | 项目源码 | 文件源码
def testSplitDataframe(self):
    df = pd.DataFrame({"X": range(11),
                       "Y": np.concatenate((np.zeros(6), np.ones(5))),
                       "Z": np.concatenate((np.zeros(3), np.ones(8)))})

    metric = metrics.Distribution("X", ["Z"])
    output = core.Analyze(df).split_by(["Y"]).calculate(metric).run()

    correct = pd.DataFrame(
        np.array([0.2, 0.8, 0.0, 1.0]),
        columns=["X Distribution"],
        index=pd.MultiIndex(levels=[[0.0, 1.0], [0.0, 1.0]],
                            labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
                            names=["Y", "Z"]))

    self.assertTrue(all(output.index == correct.index) and
                    all(output.columns == correct.columns) and
                    np.all(abs(output.values - correct.values) < 1e-10))
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def _isnull_old(obj):
    """Detect missing values. Treat None, NaN, INF, -INF as null.

    Parameters
    ----------
    arr: ndarray or object value

    Returns
    -------
    boolean ndarray or boolean
    """
    if lib.isscalar(obj):
        return lib.checknull_old(obj)
    # hack (for now) because MI registers as ndarray
    elif isinstance(obj, pd.MultiIndex):
        raise NotImplementedError("isnull is not defined for MultiIndex")
    elif isinstance(obj, (ABCSeries, np.ndarray, pd.Index)):
        return _isnull_ndarraylike_old(obj)
    elif isinstance(obj, ABCGeneric):
        return obj._constructor(obj._data.isnull(func=_isnull_old))
    elif isinstance(obj, list) or hasattr(obj, '__array__'):
        return _isnull_ndarraylike_old(np.asarray(obj))
    else:
        return obj is None
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_equals_op_multiindex(self):
        # GH9785
        # test comparisons of multiindex
        from pandas.compat import StringIO
        df = pd.read_csv(StringIO('a,b,c\n1,2,3\n4,5,6'), index_col=[0, 1])
        tm.assert_numpy_array_equal(df.index == df.index,
                                    np.array([True, True]))

        mi1 = MultiIndex.from_tuples([(1, 2), (4, 5)])
        tm.assert_numpy_array_equal(df.index == mi1, np.array([True, True]))
        mi2 = MultiIndex.from_tuples([(1, 2), (4, 6)])
        tm.assert_numpy_array_equal(df.index == mi2, np.array([True, False]))
        mi3 = MultiIndex.from_tuples([(1, 2), (4, 5), (8, 9)])
        with tm.assertRaisesRegexp(ValueError, "Lengths must match"):
            df.index == mi3

        index_a = Index(['foo', 'bar', 'baz'])
        with tm.assertRaisesRegexp(ValueError, "Lengths must match"):
            df.index == index_a
        tm.assert_numpy_array_equal(index_a == mi3,
                                    np.array([False, False, False]))
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_stack_ints(self):
        df = DataFrame(
            np.random.randn(30, 27),
            columns=MultiIndex.from_tuples(
                list(itertools.product(range(3), repeat=3))
            )
        )
        assert_frame_equal(
            df.stack(level=[1, 2]),
            df.stack(level=1).stack(level=1)
        )
        assert_frame_equal(
            df.stack(level=[-2, -1]),
            df.stack(level=1).stack(level=1)
        )

        df_named = df.copy()
        df_named.columns.set_names(range(3), inplace=True)
        assert_frame_equal(
            df_named.stack(level=[1, 2]),
            df_named.stack(level=1).stack(level=1)
        )
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_unstack_level_binding(self):
        # GH9856
        mi = pd.MultiIndex(
            levels=[[u('foo'), u('bar')], [u('one'), u('two')],
                    [u('a'), u('b')]],
            labels=[[0, 0, 1, 1], [0, 1, 0, 1], [1, 0, 1, 0]],
            names=[u('first'), u('second'), u('third')])
        s = pd.Series(0, index=mi)
        result = s.unstack([1, 2]).stack(0)

        expected_mi = pd.MultiIndex(
            levels=[['foo', 'bar'], ['one', 'two']],
            labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
            names=['first', 'second'])

        expected = pd.DataFrame(np.array([[np.nan, 0],
                                          [0, np.nan],
                                          [np.nan, 0],
                                          [0, np.nan]],
                                         dtype=np.float64),
                                index=expected_mi,
                                columns=pd.Index(['a', 'b'], name='third'))

        assert_frame_equal(result, expected)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_unstack_to_series(self):
        # check reversibility
        data = self.frame.unstack()

        self.assertTrue(isinstance(data, Series))
        undo = data.unstack().T
        assert_frame_equal(undo, self.frame)

        # check NA handling
        data = DataFrame({'x': [1, 2, np.NaN], 'y': [3.0, 4, np.NaN]})
        data.index = Index(['a', 'b', 'c'])
        result = data.unstack()

        midx = MultiIndex(levels=[['x', 'y'], ['a', 'b', 'c']],
                          labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]])
        expected = Series([1, 2, np.NaN, 3, 4, np.NaN], index=midx)

        assert_series_equal(result, expected)

        # check composability of unstack
        old_data = data.copy()
        for _ in range(4):
            data = data.unstack()
        assert_frame_equal(old_data, data)
项目:powerplantmatching    作者:FRESNA    | 项目源码 | 文件源码
def bar_fueltype_and_country_totals(dfs, keys, figsize=(12,8)):
    df = lookup(dfs, keys)
    countries = df.columns.levels[0] if isinstance(df.columns, pd.MultiIndex) else df.columns
    n = len(countries)
    subplots = gather_nrows_ncols(n)
    fig, ax = plt.subplots(*subplots, figsize=figsize)

    if sum(subplots)>2:
        ax_iter = ax.flat
    else:
        ax_iter = np.array(ax).flat
    for country in countries:
        ax = next(ax_iter)
        df[country].plot.bar(ax=ax, sharex=True, rot=55, legend=None)
        ax.ticklabel_format(axis='y', style='sci', scilimits=(-2,2))
        ax.set_title(country)
        fig.tight_layout(pad=0.5)
    return fig, ax
项目:PyBloqs    作者:manahl    | 项目源码 | 文件源码
def _get_header_iterable(self):
        """Reformats all but the last header rows."""
        df_clean = self.df.loc[:, self.df.columns.get_level_values(0) != ORG_ROW_NAMES]
        if isinstance(df_clean.columns, pd.MultiIndex):
            transpose_tuples = zip(*df_clean.columns.tolist())
            header_values = []
            for i, t in enumerate(transpose_tuples):
                if i < len(transpose_tuples) - 1:
                    # Not the last column, aggregate repeated items, e.g. [['aa', 'aa', 'aa'], ['bb', 'bb', 'bb']]
                    header_values.append([list(g) for _, g in itertools.groupby(t)])
                else:
                    # For the last column keep all elements in single list, e.g. ['a', 'b', 'c', 'a', 'b', 'c']
                    header_values.append(list(t))
            return header_values
        else:
            return [df_clean.columns.tolist()]
项目:fireant    作者:kayak    | 项目源码 | 文件源码
def _perform_operation(self, dataframe, key, schema, value_func, operation):
        # Check for references
        references = (dataframe.columns.get_level_values(0).tolist()
                      if isinstance(dataframe.columns, pd.MultiIndex)
                      else [None])

        for reference in references:
            metric_df = value_func(dataframe, schema, reference=reference)

            operation_key = ('{}_{}'.format(metric_df.name, key)
                             if reference is None
                             else (reference, '{}_{}'.format(metric_df.name[1], key)))

            if isinstance(dataframe.index, pd.MultiIndex):
                unstack_levels = list(range(1, len(dataframe.index.levels)))
                dataframe[operation_key] = metric_df.groupby(level=unstack_levels).apply(operation)

            else:
                dataframe[operation_key] = operation(metric_df)
项目:fireant    作者:kayak    | 项目源码 | 文件源码
def _render_data(self, dataframe, display_schema):
        n = len(dataframe.index.levels) if isinstance(dataframe.index, pd.MultiIndex) else 1
        dimensions = list(display_schema['dimensions'].items())
        row_dimensions, column_dimensions = dimensions[:n], dimensions[n:]

        data = []
        for idx, df_row in dataframe.iterrows():
            row = {}

            if not isinstance(idx, tuple):
                idx = (idx,)

            for key, value in self._render_dimension_data(idx, row_dimensions):
                row[key] = value

            for key, value in self._render_metric_data(df_row, column_dimensions,
                                                       display_schema['metrics'], display_schema.get('references')):
                row[key] = value

            data.append(row)

        return data
项目:xarray_filters    作者:ContinuumIO    | 项目源码 | 文件源码
def create_multi_index(arr):
    '''From DataArray arr make a pandas.MultiIndex for the arr.coords

    Parameters
    ----------

    arr: xarray.DataArray

    Returns
    -------

    index: pandas.MultiIndex instance with index names
           taken from arr.dims and levels taken from arr.coords

    Examples
    --------

    '''
    np_arrs = tuple(getattr(arr, dim).values for dim in arr.dims)
    index = pd.MultiIndex.from_product(np_arrs, names=arr.dims)
    return index
项目:perfume    作者:leifwalsh    | 项目源码 | 文件源码
def setUp(self):
        samples = []
        t = 1.0
        for i in range(20):
            sample = []
            sample.append(t)
            t += 1.1
            sample.append(t)
            t += 0.2
            sample.append(t)
            t += 1.5
            sample.append(t)
            t += 0.1
            samples.append(sample)
        self.samples = pd.DataFrame(
            data=samples,
            columns=pd.MultiIndex(
                levels=[['fn1', 'fn2'], ['begin', 'end']],
                labels=[[0, 0, 1, 1], [0, 1, 0, 1]]
            )
        )
项目:crop-seq    作者:epigen    | 项目源码 | 文件源码
def read_seurat_hdf5(hdf5_file):
    import h5py
    with h5py.File(hdf5_file, 'r') as handle:
        cols = handle.get("seurat_matrix/columns").value
        rows = handle.get("seurat_matrix/rows").value
        df = handle.get("seurat_matrix/matrix").value
    seurat_matrix = pd.DataFrame(df, index=cols, columns=rows).T

    # add info as multiindex columns
    condition = map(lambda x: x[0], seurat_matrix.columns.str.split("|"))
    replicate = map(lambda x: x[1], seurat_matrix.columns.str.split("|"))
    cell = map(lambda x: x[2], seurat_matrix.columns.str.split("|"))
    grna = map(lambda x: x[3], seurat_matrix.columns.str.split("|"))
    gene = map(lambda x: x[1] if len(x) > 1 else x[0][:4], pd.Series(grna).str.split("_"))
    seurat_matrix.columns = pd.MultiIndex.from_arrays([condition, replicate, cell, grna, gene], names=['condition', 'replicate', 'cell', 'grna', 'gene'])

    return seurat_matrix
项目:q2-diversity    作者:qiime2    | 项目源码 | 文件源码
def test_observed_otus(self):
        t = biom.Table(np.array([[150, 100, 100], [50, 100, 100]]),
                       ['O1', 'O2'],
                       ['S1', 'S2', 'S3'])
        obs = _compute_rarefaction_data(feature_table=t,
                                        min_depth=1,
                                        max_depth=200,
                                        steps=2,
                                        iterations=1,
                                        phylogeny=None,
                                        metrics=['observed_otus'])

        exp_ind = pd.MultiIndex.from_product(
            [[1, 200], [1]],
            names=['depth', 'iter'])
        exp = pd.DataFrame(data=[[1, 2], [1, 2], [1, 2]],
                           columns=exp_ind,
                           index=['S1', 'S2', 'S3'])
        pdt.assert_frame_equal(obs['observed_otus'], exp)
项目:q2-diversity    作者:qiime2    | 项目源码 | 文件源码
def test_multiple_metrics(self):
        t = biom.Table(np.array([[150, 100, 100], [50, 100, 100]]),
                       ['O1', 'O2'],
                       ['S1', 'S2', 'S3'])
        obs = _compute_rarefaction_data(feature_table=t,
                                        min_depth=1,
                                        max_depth=200,
                                        steps=2,
                                        iterations=1,
                                        phylogeny=None,
                                        metrics=['observed_otus', 'shannon'])

        exp_ind = pd.MultiIndex.from_product(
            [[1, 200], [1]],
            names=['depth', 'iter'])
        exp = pd.DataFrame(data=[[1, 2], [1, 2], [1, 2]],
                           columns=exp_ind,
                           index=['S1', 'S2', 'S3'])
        pdt.assert_frame_equal(obs['observed_otus'], exp)

        exp = pd.DataFrame(data=[[0., 0.811278124459], [0., 1.], [0., 1.]],
                           columns=exp_ind,
                           index=['S1', 'S2', 'S3'])
        pdt.assert_frame_equal(obs['shannon'], exp)
项目:q2-diversity    作者:qiime2    | 项目源码 | 文件源码
def test_one_iteration_no_metadata(self):
        columns = pd.MultiIndex.from_product([[1, 200], [1]],
                                             names=['depth', 'iter'])
        data = pd.DataFrame(data=[[1, 2], [1, 2], [1, 2]],
                            columns=columns, index=['S1', 'S2', 'S3'])

        # No counts provided because no metadata
        obs = _compute_summary(data, 'sample-id')

        d = [['S1', 1,   1, 1., 1., 1., 1., 1., 1., 1., 1., 1.],
             ['S1', 200, 1, 2., 2., 2., 2., 2., 2., 2., 2., 2.],
             ['S2', 1,   1, 1., 1., 1., 1., 1., 1., 1., 1., 1.],
             ['S2', 200, 1, 2., 2., 2., 2., 2., 2., 2., 2., 2.],
             ['S3', 1,   1, 1., 1., 1., 1., 1., 1., 1., 1., 1.],
             ['S3', 200, 1, 2., 2., 2., 2., 2., 2., 2., 2., 2.]]
        exp = pd.DataFrame(data=d, columns=['sample-id', 'depth', 'count',
                                            'min', '2%', '9%', '25%', '50%',
                                            '75%', '91%', '98%', 'max'])
        pdt.assert_frame_equal(exp, obs)
项目:q2-diversity    作者:qiime2    | 项目源码 | 文件源码
def test_two_iterations_no_metadata(self):
        columns = pd.MultiIndex.from_product([[1, 200], [1, 2]],
                                             names=['depth', 'iter'])
        data = pd.DataFrame(data=[[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]],
                            columns=columns, index=['S1', 'S2', 'S3'])

        # No counts provided because no metadata
        obs = _compute_summary(data, 'sample-id')

        d = [['S1', 1,   1, 1., 1.02, 1.09, 1.25, 1.5, 1.75, 1.91, 1.98, 2.],
             ['S1', 200, 1, 3., 3.02, 3.09, 3.25, 3.5, 3.75, 3.91, 3.98, 4.],
             ['S2', 1,   1, 1., 1.02, 1.09, 1.25, 1.5, 1.75, 1.91, 1.98, 2.],
             ['S2', 200, 1, 3., 3.02, 3.09, 3.25, 3.5, 3.75, 3.91, 3.98, 4.],
             ['S3', 1,   1, 1., 1.02, 1.09, 1.25, 1.5, 1.75, 1.91, 1.98, 2.],
             ['S3', 200, 1, 3., 3.02, 3.09, 3.25, 3.5, 3.75, 3.91, 3.98, 4.]]
        exp = pd.DataFrame(data=d, columns=['sample-id', 'depth', 'count',
                                            'min', '2%', '9%', '25%', '50%',
                                            '75%', '91%', '98%', 'max'])
        pdt.assert_frame_equal(exp, obs)
项目:q2-diversity    作者:qiime2    | 项目源码 | 文件源码
def test_unique_metadata_groups(self):
        columns = pd.MultiIndex.from_tuples([(1, 1), (1, 2), (200, 1),
                                             (200, 2), ('pet', '')],
                                            names=['depth', 'iter'])
        data = pd.DataFrame(data=[[1, 2, 3, 4, 'russ'], [5, 6, 7, 8, 'milo'],
                                  [9, 10, 11, 12, 'peanut']],
                            columns=columns, index=['S1', 'S2', 'S3'])

        obs = _reindex_with_metadata('pet', ['pet'], data)

        exp_col = pd.MultiIndex(levels=[[1, 200, 'pet'], [1, 2, '']],
                                labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
                                names=['depth', 'iter'])
        exp_ind = pd.Index(['milo', 'peanut', 'russ'], name='pet')
        exp = pd.DataFrame(data=[[5, 6, 7, 8], [9, 10, 11, 12], [1, 2, 3, 4]],
                           columns=exp_col, index=exp_ind)

        pdt.assert_frame_equal(exp, obs[0])

        exp = pd.DataFrame(data=[[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]],
                           columns=exp_col, index=exp_ind)

        pdt.assert_frame_equal(exp, obs[1])
项目:q2-diversity    作者:qiime2    | 项目源码 | 文件源码
def test_multiple_categories(self):
        columns = pd.MultiIndex.from_tuples([(1, 1), (1, 2), (200, 1),
                                             (200, 2), ('pet', ''),
                                             ('toy', '')],
                                            names=['depth', 'iter'])
        data = pd.DataFrame(data=[[1, 2, 3, 4, 'russ', 'stick'],
                                  [5, 6, 7, 8, 'milo', 'yeti'],
                                  [9, 10, 11, 12, 'peanut', 'stick']],
                            columns=columns, index=['S1', 'S2', 'S3'])

        obs = _reindex_with_metadata('pet', ['pet', 'toy'], data)

        exp_col = pd.MultiIndex(levels=[[1, 200, 'pet', 'toy'], [1, 2, '']],
                                labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
                                names=['depth', 'iter'])
        exp_ind = pd.Index(['milo', 'peanut', 'russ'], name='pet')
        exp = pd.DataFrame(data=[[5, 6, 7, 8], [9, 10, 11, 12], [1, 2, 3, 4]],
                           columns=exp_col, index=exp_ind)

        pdt.assert_frame_equal(exp, obs[0])

        exp = pd.DataFrame(data=[[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]],
                           columns=exp_col, index=exp_ind)

        pdt.assert_frame_equal(exp, obs[1])

        obs = _reindex_with_metadata('toy', ['pet', 'toy'], data)

        exp_ind = pd.Index(['stick', 'yeti'], name='toy')
        exp = pd.DataFrame(data=[[5, 6, 7, 8], [5, 6, 7, 8]],
                           columns=exp_col, index=exp_ind)

        pdt.assert_frame_equal(exp, obs[0])

        exp = pd.DataFrame(data=[[2, 2, 2, 2], [1, 1, 1, 1]],
                           columns=exp_col, index=exp_ind)

        pdt.assert_frame_equal(exp, obs[1])
项目:kaggle-seizure-prediction    作者:sics-lm    | 项目源码 | 文件源码
def normalize_segment_names(dataframe, inplace=False):
    """
    Makes the segment index of the dataframe have names which correspond to the original .mat segment names.
    :param dataframe: The dataframe with segment names
    :param inplace: If True, the segment index will be changed in place in the given data frame.
    :return: A DataFrame where the segment name part of the index has been canonicalized. If inplace is True, the
             orignal dataframe is returned, otherwise a copy is returned.
    """

    index_values = dataframe.index.get_values()
    fixed_values = [(fileutils.get_segment_name(filename), frame) for filename, frame in index_values]
    if not inplace:
        dataframe = dataframe.copy()
    dataframe.index = pd.MultiIndex.from_tuples(fixed_values, names=dataframe.index.names)
    return dataframe
项目:kaggle-seizure-prediction    作者:sics-lm    | 项目源码 | 文件源码
def reshape_frames(dataframe, frame_length=12):
    """
    Returns a new dataframe with the given frame length.
    :param dataframe: A pandas DataFrame with one window per row.
    :param frame_length: The desired number of windows for each feature frame. Must divide the number of windows in
                         *dataframe* evenly.
    :return: A new pandas DataFrame with the desired window frame width. The columns of the new data-frame will be
             multi-index so that
        future concatenation of data frames align properly.
    """

    # Assert that the length of the data frame is divisible by
    # frame_length
    n_windows, window_width = dataframe.shape

    if n_windows % frame_length != 0:
        raise ValueError("The dataframe has {} windows which"
                         " is not divisible by the frame"
                         " length {}".format(n_windows, frame_length))
    values = dataframe.values
    n_frames = n_windows / frame_length
    frame_width = window_width * frame_length
    window_columns = dataframe.columns
    column_index = pd.MultiIndex.from_product([range(frame_length),
                                               window_columns],
                                              names=['window', 'feature'])
    reshaped_frame = pd.DataFrame(data=values.reshape(n_frames,
                                                      frame_width),
                                  columns=column_index)
    reshaped_frame.sortlevel(axis=1)
    return reshaped_frame
项目:catalyst    作者:enigmampc    | 项目源码 | 文件源码
def get_zeroth_quarter_idx(self, stacked_last_per_qtr):
        """
        Filters for releases that are on or after each simulation date and
        determines the next quarter by picking out the upcoming release for
        each date in the index.

        Parameters
        ----------
        stacked_last_per_qtr : pd.DataFrame
            A DataFrame with index of calendar dates, sid, and normalized
            quarters with each row being the latest estimate for the row's
            index values, sorted by event date.

        Returns
        -------
        next_releases_per_date_index : pd.MultiIndex
            An index of calendar dates, sid, and normalized quarters, for only
            the rows that have a next event.
        """
        next_releases_per_date = stacked_last_per_qtr.loc[
            stacked_last_per_qtr[EVENT_DATE_FIELD_NAME] >=
            stacked_last_per_qtr.index.get_level_values(SIMULATION_DATES)
        ].groupby(
            level=[SIMULATION_DATES, SID_FIELD_NAME],
            as_index=False,
            # Here we take advantage of the fact that `stacked_last_per_qtr` is
            # sorted by event date.
        ).nth(0)
        return next_releases_per_date.index
项目:catalyst    作者:enigmampc    | 项目源码 | 文件源码
def get_zeroth_quarter_idx(self, stacked_last_per_qtr):
        """
        Filters for releases that are on or after each simulation date and
        determines the previous quarter by picking out the most recent
        release relative to each date in the index.

        Parameters
        ----------
        stacked_last_per_qtr : pd.DataFrame
            A DataFrame with index of calendar dates, sid, and normalized
            quarters with each row being the latest estimate for the row's
            index values, sorted by event date.

        Returns
        -------
        previous_releases_per_date_index : pd.MultiIndex
            An index of calendar dates, sid, and normalized quarters, for only
            the rows that have a previous event.
        """
        previous_releases_per_date = stacked_last_per_qtr.loc[
            stacked_last_per_qtr[EVENT_DATE_FIELD_NAME] <=
            stacked_last_per_qtr.index.get_level_values(SIMULATION_DATES)
        ].groupby(
            level=[SIMULATION_DATES, SID_FIELD_NAME],
            as_index=False,
            # Here we take advantage of the fact that `stacked_last_per_qtr` is
            # sorted by event date.
        ).nth(-1)
        return previous_releases_per_date.index
项目:jupyter-handsontables    作者:techmuch    | 项目源码 | 文件源码
def validate(self, obj, value):
        value = super(PandasDataFrame, self).validate(obj, value)
        if self.get_metadata('lexsort'):
            if isinstance(value.columns, pd.MultiIndex):
                value = value.sortlevel(0, axis=1)
        return value
项目:jupyter-handsontables    作者:techmuch    | 项目源码 | 文件源码
def validate(self, obj, value):
        value = super(PandasDataFrame, self).validate(obj, value)
        if self.get_metadata('lexsort'):
            if isinstance(value.columns, pd.MultiIndex):
                value = value.sortlevel(0, axis=1)
        return value
项目:meterstick    作者:google    | 项目源码 | 文件源码
def testTwoDimensionalDistribution(self):
    df = pd.DataFrame({"X": [1, 1, 1, 2, 2, 3, 4],
                       "Y": [1, 2, 0, 1, 1, 1, 1],
                       "Z": [1, 0, 0, 0, 0, 0, 0]})
    weights = np.array([1, 1, 1, 1, 1, 1, 1])
    metric = metrics.Distribution("X", ["Y", "Z"])
    output = metric(df, weights)
    correct = pd.DataFrame(
        np.array([1 / 14., 1 / 14., 1 / 14., 11 / 14.]),
        columns=[""],
        index=pd.MultiIndex(levels=[[0, 1, 2], [0, 1]],
                            labels=[[1, 2, 0, 1], [1, 0, 0, 0]],
                            names=["Y", "Z"]))
    self.assertTrue(output.equals(correct))
项目:meterstick    作者:google    | 项目源码 | 文件源码
def testShuffledDataframeRelativeToJackknife(self):
    # Same as test above, but also testing that reordering the data doesn't
    # change results, up to order.
    df = pd.DataFrame({"X": range(11),
                       "Y": np.concatenate((np.zeros(6), np.ones(5))),
                       "Z": np.concatenate((np.zeros(3), np.ones(8)))})

    metric = metrics.Distribution("X", ["Z"])
    se_method = standard_errors.Jackknife()
    output = core.Analyze(df.iloc[np.random.permutation(11)]).relative_to(
        comparisons.AbsoluteDifference("Y", 0)).with_standard_errors(
            se_method).calculate(metric).run()
    output = (output.
              reset_index().
              sort_values(by=["Y", "Z"]).
              set_index(["Y", "Z"]))

    correct = pd.DataFrame(
        np.array([[-0.2, 0.18100283490],
                  [0.2, 0.18100283490]]),
        columns=["X Distribution Absolute Difference",
                 "X Distribution Absolute Difference Jackknife SE"],
        index=pd.MultiIndex(levels=[[1.], [0., 1.]],
                            labels=[[0, 0], [0, 1]],
                            names=["Y", "Z"]))
    correct = (correct.
               reset_index().
               sort_values(by=["Y", "Z"]).
               set_index(["Y", "Z"]))

    self.assertTrue(all(output.index == correct.index) and
                    all(output.columns == correct.columns) and
                    np.all(abs(output.values - correct.values) < 1e-10))
项目:weightedcalcs    作者:jsvine    | 项目源码 | 文件源码
def groupby_deco(func):
    def func_wrapper(self, thing, *args, **kwargs):
        if isinstance(thing, pd.core.groupby.DataFrameGroupBy):
            agg = thing.apply(lambda x: func(self, x, *args, **kwargs))
            is_series = isinstance(agg, pd.core.series.Series)
            has_multiindex = isinstance(agg.index, pd.MultiIndex)
            if is_series and has_multiindex:
                return agg.unstack()
            else:
                return agg
        return func(self, thing, *args, **kwargs)
    return func_wrapper
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def _isnull_new(obj):
    if lib.isscalar(obj):
        return lib.checknull(obj)
    # hack (for now) because MI registers as ndarray
    elif isinstance(obj, pd.MultiIndex):
        raise NotImplementedError("isnull is not defined for MultiIndex")
    elif isinstance(obj, (ABCSeries, np.ndarray, pd.Index)):
        return _isnull_ndarraylike(obj)
    elif isinstance(obj, ABCGeneric):
        return obj._constructor(obj._data.isnull(func=isnull))
    elif isinstance(obj, list) or hasattr(obj, '__array__'):
        return _isnull_ndarraylike(np.asarray(obj))
    else:
        return obj is None
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_get_level_values_box(self):
        from pandas import MultiIndex

        dates = date_range('1/1/2000', periods=4)
        levels = [dates, [0, 1]]
        labels = [[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]]

        index = MultiIndex(levels=levels, labels=labels)

        self.assertTrue(isinstance(index.get_level_values(0)[0], Timestamp))
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def setUp(self):
        self.indices = dict(unicodeIndex=tm.makeUnicodeIndex(100),
                            strIndex=tm.makeStringIndex(100),
                            dateIndex=tm.makeDateIndex(100),
                            periodIndex=tm.makePeriodIndex(100),
                            tdIndex=tm.makeTimedeltaIndex(100),
                            intIndex=tm.makeIntIndex(100),
                            rangeIndex=tm.makeIntIndex(100),
                            floatIndex=tm.makeFloatIndex(100),
                            boolIndex=Index([True, False]),
                            catIndex=tm.makeCategoricalIndex(100),
                            empty=Index([]),
                            tuples=MultiIndex.from_tuples(lzip(
                                ['foo', 'bar', 'baz'], [1, 2, 3])))
        self.setup_indices()
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_construction_list_mixed_tuples(self):
        # 10697
        # if we are constructing from a mixed list of tuples, make sure that we
        # are independent of the sorting order
        idx1 = Index([('A', 1), 'B'])
        self.assertIsInstance(idx1, Index) and self.assertNotInstance(
            idx1, MultiIndex)
        idx2 = Index(['B', ('A', 1)])
        self.assertIsInstance(idx2, Index) and self.assertNotInstance(
            idx2, MultiIndex)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_str_attribute(self):
        # GH9068
        methods = ['strip', 'rstrip', 'lstrip']
        idx = Index([' jack', 'jill ', ' jesse ', 'frank'])
        for method in methods:
            expected = Index([getattr(str, method)(x) for x in idx.values])
            tm.assert_index_equal(
                getattr(Index.str, method)(idx.str), expected)

        # create a few instances that are not able to use .str accessor
        indices = [Index(range(5)), tm.makeDateIndex(10),
                   MultiIndex.from_tuples([('foo', '1'), ('bar', '3')]),
                   PeriodIndex(start='2000', end='2010', freq='A')]
        for idx in indices:
            with self.assertRaisesRegexp(AttributeError,
                                         'only use .str accessor'):
                idx.str.repeat(2)

        idx = Index(['a b c', 'd e', 'f'])
        expected = Index([['a', 'b', 'c'], ['d', 'e'], ['f']])
        tm.assert_index_equal(idx.str.split(), expected)
        tm.assert_index_equal(idx.str.split(expand=False), expected)

        expected = MultiIndex.from_tuples([('a', 'b', 'c'), ('d', 'e', np.nan),
                                           ('f', np.nan, np.nan)])
        tm.assert_index_equal(idx.str.split(expand=True), expected)

        # test boolean case, should return np.array instead of boolean Index
        idx = Index(['a1', 'a2', 'b1', 'b2'])
        expected = np.array([True, True, False, False])
        tm.assert_numpy_array_equal(idx.str.startswith('a'), expected)
        self.assertIsInstance(idx.str.startswith('a'), np.ndarray)
        s = Series(range(4), index=idx)
        expected = Series(range(2), index=['a1', 'a2'])
        tm.assert_series_equal(s[s.index.str.startswith('a')], expected)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_reindex_doesnt_preserve_type_if_target_is_empty_index(self):
        # GH7774
        idx = pd.Index(list('abc'))

        def get_reindex_type(target):
            return idx.reindex(target)[0].dtype.type

        self.assertEqual(get_reindex_type(pd.Int64Index([])), np.int64)
        self.assertEqual(get_reindex_type(pd.Float64Index([])), np.float64)
        self.assertEqual(get_reindex_type(pd.DatetimeIndex([])), np.datetime64)

        reindexed = idx.reindex(pd.MultiIndex(
            [pd.Int64Index([]), pd.Float64Index([])], [[], []]))[0]
        self.assertEqual(reindexed.levels[0].dtype.type, np.int64)
        self.assertEqual(reindexed.levels[1].dtype.type, np.float64)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_pivot_index_none(self):
        # gh-3962
        data = {
            'index': ['A', 'B', 'C', 'C', 'B', 'A'],
            'columns': ['One', 'One', 'One', 'Two', 'Two', 'Two'],
            'values': [1., 2., 3., 3., 2., 1.]
        }

        frame = DataFrame(data).set_index('index')
        result = frame.pivot(columns='columns', values='values')
        expected = DataFrame({
            'One': {'A': 1., 'B': 2., 'C': 3.},
            'Two': {'A': 1., 'B': 2., 'C': 3.}
        })

        expected.index.name, expected.columns.name = 'index', 'columns'
        assert_frame_equal(result, expected)

        # omit values
        result = frame.pivot(columns='columns')

        expected.columns = pd.MultiIndex.from_tuples([('values', 'One'),
                                                      ('values', 'Two')],
                                                     names=[None, 'columns'])
        expected.index.name = 'index'
        assert_frame_equal(result, expected, check_names=False)
        self.assertEqual(result.index.name, 'index',)
        self.assertEqual(result.columns.names, (None, 'columns'))
        expected.columns = expected.columns.droplevel(0)

        data = {
            'index': range(7),
            'columns': ['One', 'One', 'One', 'Two', 'Two', 'Two'],
            'values': [1., 2., 3., 3., 2., 1.]
        }

        result = frame.pivot(columns='columns', values='values')

        expected.columns.name = 'columns'
        assert_frame_equal(result, expected)