Python pandas 模块,option_context() 实例源码

我们从Python开源项目中,提取了以下42个代码示例,用于说明如何使用pandas.option_context()

项目:pyprocessmacro    作者:QuentinAndre    | 项目源码 | 文件源码
def summary(self):
        """
        Print the summary of the Process model.
        :return: None
        """
        with pd.option_context("precision", self.options["precision"]):
            full_model = self.outcome_models[self.iv]
            m_models = [self.outcome_models.get(med_name) for med_name in self.mediators]
            if self.options["detail"]:
                print("\n***************************** OUTCOME MODELS ****************************\n")
                print(full_model)
                print("\n-------------------------------------------------------------------------\n")
                for med_model in m_models:
                    print(med_model)
                    print("\n-------------------------------------------------------------------------\n")
            if self.indirect_model:
                print("\n********************** DIRECT AND INDIRECT EFFECTS **********************\n")
                print(self.direct_model)
                print(self.indirect_model)
            else:
                print("\n********************** CONDITIONAL EFFECTS **********************\n")
                print(self.direct_model)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_show_null_counts(self):

        df = DataFrame(1, columns=range(10), index=range(10))
        df.iloc[1, 1] = np.nan

        def check(null_counts, result):
            buf = StringIO()
            df.info(buf=buf, null_counts=null_counts)
            self.assertTrue(('non-null' in buf.getvalue()) is result)

        with option_context('display.max_info_rows', 20,
                            'display.max_info_columns', 20):
            check(None, True)
            check(True, True)
            check(False, False)

        with option_context('display.max_info_rows', 5,
                            'display.max_info_columns', 5):
            check(None, False)
            check(True, False)
            check(False, False)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_repr_truncation(self):
        max_len = 20
        with option_context("display.max_colwidth", max_len):
            df = DataFrame({'A': np.random.randn(10),
                            'B': [tm.rands(np.random.randint(
                                max_len - 1, max_len + 1)) for i in range(10)
            ]})
            r = repr(df)
            r = r[r.find('\n') + 1:]

            adj = fmt._get_adjustment()

            for line, value in lzip(r.split('\n'), df['B']):
                if adj.len(value) + 1 > max_len:
                    self.assertIn('...', line)
                else:
                    self.assertNotIn('...', line)

        with option_context("display.max_colwidth", 999999):
            self.assertNotIn('...', repr(df))

        with option_context("display.max_colwidth", max_len + 2):
            self.assertNotIn('...', repr(df))
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_expand_frame_repr(self):
        df_small = DataFrame('hello', [0], [0])
        df_wide = DataFrame('hello', [0], lrange(10))
        df_tall = DataFrame('hello', lrange(30), lrange(5))

        with option_context('mode.sim_interactive', True):
            with option_context('display.max_columns', 10, 'display.width', 20,
                                'display.max_rows', 20,
                                'display.show_dimensions', True):
                with option_context('display.expand_frame_repr', True):
                    self.assertFalse(has_truncated_repr(df_small))
                    self.assertFalse(has_expanded_repr(df_small))
                    self.assertFalse(has_truncated_repr(df_wide))
                    self.assertTrue(has_expanded_repr(df_wide))
                    self.assertTrue(has_vertically_truncated_repr(df_tall))
                    self.assertTrue(has_expanded_repr(df_tall))

                with option_context('display.expand_frame_repr', False):
                    self.assertFalse(has_truncated_repr(df_small))
                    self.assertFalse(has_expanded_repr(df_small))
                    self.assertFalse(has_horizontally_truncated_repr(df_wide))
                    self.assertFalse(has_expanded_repr(df_wide))
                    self.assertTrue(has_vertically_truncated_repr(df_tall))
                    self.assertFalse(has_expanded_repr(df_tall))
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_str_max_colwidth(self):
        # GH 7856
        df = pd.DataFrame([{'a': 'foo',
                            'b': 'bar',
                            'c': 'uncomfortably long line with lots of stuff',
                            'd': 1}, {'a': 'foo',
                                      'b': 'bar',
                                      'c': 'stuff',
                                      'd': 1}])
        df.set_index(['a', 'b', 'c'])
        self.assertTrue(
            str(df) ==
            '     a    b                                           c  d\n'
            '0  foo  bar  uncomfortably long line with lots of stuff  1\n'
            '1  foo  bar                                       stuff  1')
        with option_context('max_colwidth', 20):
            self.assertTrue(str(df) == '     a    b                    c  d\n'
                            '0  foo  bar  uncomfortably lo...  1\n'
                            '1  foo  bar                stuff  1')
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_wide_repr(self):
        with option_context('mode.sim_interactive', True,
                            'display.show_dimensions', True):
            max_cols = get_option('display.max_columns')
            df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1)))
            set_option('display.expand_frame_repr', False)
            rep_str = repr(df)

            assert "10 rows x %d columns" % (max_cols - 1) in rep_str
            set_option('display.expand_frame_repr', True)
            wide_repr = repr(df)
            self.assertNotEqual(rep_str, wide_repr)

            with option_context('display.width', 120):
                wider_repr = repr(df)
                self.assertTrue(len(wider_repr) < len(wide_repr))

        reset_option('display.expand_frame_repr')
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_wide_repr_named(self):
        with option_context('mode.sim_interactive', True):
            max_cols = get_option('display.max_columns')
            df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1)))
            df.index.name = 'DataFrame Index'
            set_option('display.expand_frame_repr', False)

            rep_str = repr(df)
            set_option('display.expand_frame_repr', True)
            wide_repr = repr(df)
            self.assertNotEqual(rep_str, wide_repr)

            with option_context('display.width', 150):
                wider_repr = repr(df)
                self.assertTrue(len(wider_repr) < len(wide_repr))

            for line in wide_repr.splitlines()[1::13]:
                self.assertIn('DataFrame Index', line)

        reset_option('display.expand_frame_repr')
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_wide_repr_multiindex_cols(self):
        with option_context('mode.sim_interactive', True):
            max_cols = get_option('display.max_columns')
            midx = MultiIndex.from_arrays(tm.rands_array(5, size=(2, 10)))
            mcols = MultiIndex.from_arrays(tm.rands_array(3, size=(2, max_cols
                                                                   - 1)))
            df = DataFrame(tm.rands_array(25, (10, max_cols - 1)),
                           index=midx, columns=mcols)
            df.index.names = ['Level 0', 'Level 1']
            set_option('display.expand_frame_repr', False)
            rep_str = repr(df)
            set_option('display.expand_frame_repr', True)
            wide_repr = repr(df)
            self.assertNotEqual(rep_str, wide_repr)

        with option_context('display.width', 150):
            wider_repr = repr(df)
            self.assertTrue(len(wider_repr) < len(wide_repr))

        reset_option('display.expand_frame_repr')
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_show_dimensions(self):
        df = DataFrame(123, lrange(10, 15), lrange(30))

        with option_context('display.max_rows', 10, 'display.max_columns', 40,
                            'display.width', 500, 'display.expand_frame_repr',
                            'info', 'display.show_dimensions', True):
            self.assertTrue('5 rows' in str(df))
            self.assertTrue('5 rows' in df._repr_html_())
        with option_context('display.max_rows', 10, 'display.max_columns', 40,
                            'display.width', 500, 'display.expand_frame_repr',
                            'info', 'display.show_dimensions', False):
            self.assertFalse('5 rows' in str(df))
            self.assertFalse('5 rows' in df._repr_html_())
        with option_context('display.max_rows', 2, 'display.max_columns', 2,
                            'display.width', 500, 'display.expand_frame_repr',
                            'info', 'display.show_dimensions', 'truncate'):
            self.assertTrue('5 rows' in str(df))
            self.assertTrue('5 rows' in df._repr_html_())
        with option_context('display.max_rows', 10, 'display.max_columns', 40,
                            'display.width', 500, 'display.expand_frame_repr',
                            'info', 'display.show_dimensions', 'truncate'):
            self.assertFalse('5 rows' in str(df))
            self.assertFalse('5 rows' in df._repr_html_())
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_info_repr(self):
        max_rows = get_option('display.max_rows')
        max_cols = get_option('display.max_columns')
        # Long
        h, w = max_rows + 1, max_cols - 1
        df = DataFrame(dict((k, np.arange(1, 1 + h)) for k in np.arange(w)))
        assert has_vertically_truncated_repr(df)
        with option_context('display.large_repr', 'info'):
            assert has_info_repr(df)

        # Wide
        h, w = max_rows - 1, max_cols + 1
        df = DataFrame(dict((k, np.arange(1, 1 + h)) for k in np.arange(w)))
        assert has_horizontally_truncated_repr(df)
        with option_context('display.large_repr', 'info'):
            assert has_info_repr(df)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_info_repr_html(self):
        max_rows = get_option('display.max_rows')
        max_cols = get_option('display.max_columns')
        # Long
        h, w = max_rows + 1, max_cols - 1
        df = DataFrame(dict((k, np.arange(1, 1 + h)) for k in np.arange(w)))
        assert r'&lt;class' not in df._repr_html_()
        with option_context('display.large_repr', 'info'):
            assert r'&lt;class' in df._repr_html_()

        # Wide
        h, w = max_rows - 1, max_cols + 1
        df = DataFrame(dict((k, np.arange(1, 1 + h)) for k in np.arange(w)))
        assert '<class' not in df._repr_html_()
        with option_context('display.large_repr', 'info'):
            assert '&lt;class' in df._repr_html_()
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_format_explicit(self):
        test_sers = self.gen_test_series()
        with option_context("display.max_rows", 4):
            res = repr(test_sers['onel'])
            exp = '0     a\n1     a\n     ..\n98    a\n99    a\ndtype: object'
            self.assertEqual(exp, res)
            res = repr(test_sers['twol'])
            exp = ('0     ab\n1     ab\n      ..\n98    ab\n99    ab\ndtype:'
                   ' object')
            self.assertEqual(exp, res)
            res = repr(test_sers['asc'])
            exp = ('0         a\n1        ab\n      ...  \n4     abcde\n5'
                   '    abcdef\ndtype: object')
            self.assertEqual(exp, res)
            res = repr(test_sers['desc'])
            exp = ('5    abcdef\n4     abcde\n      ...  \n1        ab\n0'
                   '         a\ndtype: object')
            self.assertEqual(exp, res)
项目:q2templates    作者:qiime2    | 项目源码 | 文件源码
def df_to_html(df, border=0, classes=('table', 'table-striped', 'table-hover'),
               **kwargs):
    """Convert a dataframe to HTML without truncating contents.

    pandas will truncate cell contents that exceed 50 characters by default.
    Use this function to avoid this truncation behavior.

    This function uses different default parameters than `DataFrame.to_html` to
    give uniform styling to HTML tables that are compatible with q2template
    themes. These parameters can be overridden, and they (along with any other
    parameters) will be passed through to `DataFrame.to_html`.

    Parameters
    ----------
    df : pd.DataFrame
        DataFrame to convert to HTML.
    kwargs : dict
        Parameters passed through to `pd.DataFrame.to_html`.

    Returns
    -------
    str
        DataFrame converted to HTML.

    References
    ----------
    .. [1] https://stackoverflow.com/q/26277757/3776794
    .. [2] https://github.com/pandas-dev/pandas/issues/1852

    """
    with pd.option_context('display.max_colwidth', -1):
        return df.to_html(border=border, classes=classes, **kwargs)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_representation(self):

        idx = []
        idx.append(DatetimeIndex([], freq='D'))
        idx.append(DatetimeIndex(['2011-01-01'], freq='D'))
        idx.append(DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D'))
        idx.append(DatetimeIndex(
            ['2011-01-01', '2011-01-02', '2011-01-03'], freq='D'))
        idx.append(DatetimeIndex(
            ['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00'
             ], freq='H', tz='Asia/Tokyo'))
        idx.append(DatetimeIndex(
            ['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT], tz='US/Eastern'))
        idx.append(DatetimeIndex(
            ['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT], tz='UTC'))

        exp = []
        exp.append("""DatetimeIndex([], dtype='datetime64[ns]', freq='D')""")
        exp.append("DatetimeIndex(['2011-01-01'], dtype='datetime64[ns]', "
                   "freq='D')")
        exp.append("DatetimeIndex(['2011-01-01', '2011-01-02'], "
                   "dtype='datetime64[ns]', freq='D')")
        exp.append("DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], "
                   "dtype='datetime64[ns]', freq='D')")
        exp.append("DatetimeIndex(['2011-01-01 09:00:00+09:00', "
                   "'2011-01-01 10:00:00+09:00', '2011-01-01 11:00:00+09:00']"
                   ", dtype='datetime64[ns, Asia/Tokyo]', freq='H')")
        exp.append("DatetimeIndex(['2011-01-01 09:00:00-05:00', "
                   "'2011-01-01 10:00:00-05:00', 'NaT'], "
                   "dtype='datetime64[ns, US/Eastern]', freq=None)")
        exp.append("DatetimeIndex(['2011-01-01 09:00:00+00:00', "
                   "'2011-01-01 10:00:00+00:00', 'NaT'], "
                   "dtype='datetime64[ns, UTC]', freq=None)""")

        with pd.option_context('display.width', 300):
            for indx, expected in zip(idx, exp):
                for func in ['__repr__', '__unicode__', '__str__']:
                    result = getattr(indx, func)()
                    self.assertEqual(result, expected)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_representation(self):
        idx1 = TimedeltaIndex([], freq='D')
        idx2 = TimedeltaIndex(['1 days'], freq='D')
        idx3 = TimedeltaIndex(['1 days', '2 days'], freq='D')
        idx4 = TimedeltaIndex(['1 days', '2 days', '3 days'], freq='D')
        idx5 = TimedeltaIndex(['1 days 00:00:01', '2 days', '3 days'])

        exp1 = """TimedeltaIndex([], dtype='timedelta64[ns]', freq='D')"""

        exp2 = ("TimedeltaIndex(['1 days'], dtype='timedelta64[ns]', "
                "freq='D')")

        exp3 = ("TimedeltaIndex(['1 days', '2 days'], "
                "dtype='timedelta64[ns]', freq='D')")

        exp4 = ("TimedeltaIndex(['1 days', '2 days', '3 days'], "
                "dtype='timedelta64[ns]', freq='D')")

        exp5 = ("TimedeltaIndex(['1 days 00:00:01', '2 days 00:00:00', "
                "'3 days 00:00:00'], dtype='timedelta64[ns]', freq=None)")

        with pd.option_context('display.width', 300):
            for idx, expected in zip([idx1, idx2, idx3, idx4, idx5],
                                     [exp1, exp2, exp3, exp4, exp5]):
                for func in ['__repr__', '__unicode__', '__str__']:
                    result = getattr(idx, func)()
                    self.assertEqual(result, expected)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_representation_to_series(self):
        idx1 = TimedeltaIndex([], freq='D')
        idx2 = TimedeltaIndex(['1 days'], freq='D')
        idx3 = TimedeltaIndex(['1 days', '2 days'], freq='D')
        idx4 = TimedeltaIndex(['1 days', '2 days', '3 days'], freq='D')
        idx5 = TimedeltaIndex(['1 days 00:00:01', '2 days', '3 days'])

        exp1 = """Series([], dtype: timedelta64[ns])"""

        exp2 = """0   1 days
dtype: timedelta64[ns]"""

        exp3 = """0   1 days
1   2 days
dtype: timedelta64[ns]"""

        exp4 = """0   1 days
1   2 days
2   3 days
dtype: timedelta64[ns]"""

        exp5 = """0   1 days 00:00:01
1   2 days 00:00:00
2   3 days 00:00:00
dtype: timedelta64[ns]"""

        with pd.option_context('display.width', 300):
            for idx, expected in zip([idx1, idx2, idx3, idx4, idx5],
                                     [exp1, exp2, exp3, exp4, exp5]):
                result = repr(pd.Series(idx))
                self.assertEqual(result, expected)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_repr_max_seq_item_setting(self):
        # GH10182
        idx = self.create_index()
        idx = idx.repeat(50)
        with pd.option_context("display.max_seq_items", None):
            repr(idx)
            self.assertFalse('...' in str(idx))
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_isnull_for_inf(self):
        s = Series(['a', np.inf, np.nan, 1.0])
        with pd.option_context('mode.use_inf_as_null', True):
            r = s.isnull()
            dr = s.dropna()
        e = Series([False, True, True, False])
        de = Series(['a', 1.0], index=[0, 3])
        tm.assert_series_equal(r, e)
        tm.assert_series_equal(dr, de)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_repr_chop_threshold(self):
        df = DataFrame([[0.1, 0.5], [0.5, -0.1]])
        pd.reset_option("display.chop_threshold")  # default None
        self.assertEqual(repr(df), '     0    1\n0  0.1  0.5\n1  0.5 -0.1')

        with option_context("display.chop_threshold", 0.2):
            self.assertEqual(repr(df), '     0    1\n0  0.0  0.5\n1  0.5  0.0')

        with option_context("display.chop_threshold", 0.6):
            self.assertEqual(repr(df), '     0    1\n0  0.0  0.0\n1  0.0  0.0')

        with option_context("display.chop_threshold", None):
            self.assertEqual(repr(df), '     0    1\n0  0.1  0.5\n1  0.5 -0.1')
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_repr_obeys_max_seq_limit(self):
        with option_context("display.max_seq_items", 2000):
            self.assertTrue(len(com.pprint_thing(lrange(1000))) > 1000)

        with option_context("display.max_seq_items", 5):
            self.assertTrue(len(com.pprint_thing(lrange(1000))) < 100)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_repr_non_interactive(self):
        # in non interactive mode, there can be no dependency on the
        # result of terminal auto size detection
        df = DataFrame('hello', lrange(1000), lrange(5))

        with option_context('mode.sim_interactive', False, 'display.width', 0,
                            'display.height', 0, 'display.max_rows', 5000):
            self.assertFalse(has_truncated_repr(df))
            self.assertFalse(has_expanded_repr(df))
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_auto_detect(self):
        term_width, term_height = get_terminal_size()
        fac = 1.05  # Arbitrary large factor to exceed term widht
        cols = range(int(term_width * fac))
        index = range(10)
        df = DataFrame(index=index, columns=cols)
        with option_context('mode.sim_interactive', True):
            with option_context('max_rows', None):
                with option_context('max_columns', None):
                    # Wrap around with None
                    self.assertTrue(has_expanded_repr(df))
            with option_context('max_rows', 0):
                with option_context('max_columns', 0):
                    # Truncate with auto detection.
                    self.assertTrue(has_horizontally_truncated_repr(df))

            index = range(int(term_height * fac))
            df = DataFrame(index=index, columns=cols)
            with option_context('max_rows', 0):
                with option_context('max_columns', None):
                    # Wrap around with None
                    self.assertTrue(has_expanded_repr(df))
                    # Truncate vertically
                    self.assertTrue(has_vertically_truncated_repr(df))

            with option_context('max_rows', None):
                with option_context('max_columns', 0):
                    self.assertTrue(has_horizontally_truncated_repr(df))
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_to_string_truncate_indices(self):
        for index in [tm.makeStringIndex, tm.makeUnicodeIndex, tm.makeIntIndex,
                      tm.makeDateIndex, tm.makePeriodIndex]:
            for column in [tm.makeStringIndex]:
                for h in [10, 20]:
                    for w in [10, 20]:
                        with option_context("display.expand_frame_repr",
                                            False):
                            df = DataFrame(index=index(h), columns=column(w))
                            with option_context("display.max_rows", 15):
                                if h == 20:
                                    self.assertTrue(
                                        has_vertically_truncated_repr(df))
                                else:
                                    self.assertFalse(
                                        has_vertically_truncated_repr(df))
                            with option_context("display.max_columns", 15):
                                if w == 20:
                                    self.assertTrue(
                                        has_horizontally_truncated_repr(df))
                                else:
                                    self.assertFalse(
                                        has_horizontally_truncated_repr(df))
                            with option_context("display.max_rows", 15,
                                                "display.max_columns", 15):
                                if h == 20 and w == 20:
                                    self.assertTrue(has_doubly_truncated_repr(
                                        df))
                                else:
                                    self.assertFalse(has_doubly_truncated_repr(
                                        df))
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_to_string_truncate_multilevel(self):
        arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
                  ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]
        df = DataFrame(index=arrays, columns=arrays)
        with option_context("display.max_rows", 7, "display.max_columns", 7):
            self.assertTrue(has_doubly_truncated_repr(df))
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_wide_repr_wide_columns(self):
        with option_context('mode.sim_interactive', True):
            df = DataFrame(randn(5, 3), columns=['a' * 90, 'b' * 90, 'c' * 90])
            rep_str = repr(df)

            self.assertEqual(len(rep_str.splitlines()), 20)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_wide_repr_unicode(self):
        with option_context('mode.sim_interactive', True):
            max_cols = get_option('display.max_columns')
            df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1)))
            set_option('display.expand_frame_repr', False)
            rep_str = repr(df)
            set_option('display.expand_frame_repr', True)
            wide_repr = repr(df)
            self.assertNotEqual(rep_str, wide_repr)

            with option_context('display.width', 150):
                wider_repr = repr(df)
                self.assertTrue(len(wider_repr) < len(wide_repr))

        reset_option('display.expand_frame_repr')
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_wide_repr_wide_long_columns(self):
        with option_context('mode.sim_interactive', True):
            df = DataFrame({'a': ['a' * 30, 'b' * 30],
                            'b': ['c' * 70, 'd' * 80]})

            result = repr(df)
            self.assertTrue('ccccc' in result)
            self.assertTrue('ddddd' in result)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_max_multi_index_display(self):
        # GH 7101

        # doc example (indexing.rst)

        # multi-index
        arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
                  ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]
        tuples = list(zip(*arrays))
        index = MultiIndex.from_tuples(tuples, names=['first', 'second'])
        s = Series(randn(8), index=index)

        with option_context("display.max_rows", 10):
            self.assertEqual(len(str(s).split('\n')), 10)
        with option_context("display.max_rows", 3):
            self.assertEqual(len(str(s).split('\n')), 5)
        with option_context("display.max_rows", 2):
            self.assertEqual(len(str(s).split('\n')), 5)
        with option_context("display.max_rows", 1):
            self.assertEqual(len(str(s).split('\n')), 4)
        with option_context("display.max_rows", 0):
            self.assertEqual(len(str(s).split('\n')), 10)

        # index
        s = Series(randn(8), None)

        with option_context("display.max_rows", 10):
            self.assertEqual(len(str(s).split('\n')), 9)
        with option_context("display.max_rows", 3):
            self.assertEqual(len(str(s).split('\n')), 4)
        with option_context("display.max_rows", 2):
            self.assertEqual(len(str(s).split('\n')), 4)
        with option_context("display.max_rows", 1):
            self.assertEqual(len(str(s).split('\n')), 3)
        with option_context("display.max_rows", 0):
            self.assertEqual(len(str(s).split('\n')), 9)

    # Make sure #8532 is fixed
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_consistent_format(self):
        s = pd.Series([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.9999, 1, 1] * 10)
        with option_context("display.max_rows", 10):
            res = repr(s)
        exp = ('0      1.0000\n1      1.0000\n2      1.0000\n3      '
               '1.0000\n4      1.0000\n        ...  \n125    '
               '1.0000\n126    1.0000\n127    0.9999\n128    '
               '1.0000\n129    1.0000\ndtype: float64')
        self.assertEqual(res, exp)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def chck_ncols(self, s):
        with option_context("display.max_rows", 10):
            res = repr(s)
        lines = res.split('\n')
        lines = [line for line in repr(s).split('\n')
                 if not re.match('[^\.]*\.+', line)][:-1]
        ncolsizes = len(set(len(line.strip()) for line in lines))
        self.assertEqual(ncolsizes, 1)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_truncate_ndots(self):
        def getndots(s):
            return len(re.match('[^\.]*(\.*)', s).groups()[0])

        s = Series([0, 2, 3, 6])
        with option_context("display.max_rows", 2):
            strrepr = repr(s).replace('\n', '')
        self.assertEqual(getndots(strrepr), 2)

        s = Series([0, 100, 200, 400])
        with option_context("display.max_rows", 2):
            strrepr = repr(s).replace('\n', '')
        self.assertEqual(getndots(strrepr), 3)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_output_significant_digits(self):
        # Issue #9764

        # In case default display precision changes:
        with pd.option_context('display.precision', 6):
            # DataFrame example from issue #9764
            d = pd.DataFrame(
                {'col1': [9.999e-8, 1e-7, 1.0001e-7, 2e-7, 4.999e-7, 5e-7,
                          5.0001e-7, 6e-7, 9.999e-7, 1e-6, 1.0001e-6, 2e-6,
                          4.999e-6, 5e-6, 5.0001e-6, 6e-6]})

            expected_output = {
                (0, 6):
                '           col1\n0  9.999000e-08\n1  1.000000e-07\n2  1.000100e-07\n3  2.000000e-07\n4  4.999000e-07\n5  5.000000e-07',
                (1, 6):
                '           col1\n1  1.000000e-07\n2  1.000100e-07\n3  2.000000e-07\n4  4.999000e-07\n5  5.000000e-07',
                (1, 8):
                '           col1\n1  1.000000e-07\n2  1.000100e-07\n3  2.000000e-07\n4  4.999000e-07\n5  5.000000e-07\n6  5.000100e-07\n7  6.000000e-07',
                (8, 16):
                '            col1\n8   9.999000e-07\n9   1.000000e-06\n10  1.000100e-06\n11  2.000000e-06\n12  4.999000e-06\n13  5.000000e-06\n14  5.000100e-06\n15  6.000000e-06',
                (9, 16):
                '        col1\n9   0.000001\n10  0.000001\n11  0.000002\n12  0.000005\n13  0.000005\n14  0.000005\n15  0.000006'
            }

            for (start, stop), v in expected_output.items():
                self.assertEqual(str(d[start:stop]), v)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_too_long(self):
        # GH 10451
        with pd.option_context('display.precision', 4):
            # need both a number > 1e6 and something that normally formats to
            # having length > display.precision + 6
            df = pd.DataFrame(dict(x=[12345.6789]))
            self.assertEqual(str(df), '            x\n0  12345.6789')
            df = pd.DataFrame(dict(x=[2e6]))
            self.assertEqual(str(df), '           x\n0  2000000.0')
            df = pd.DataFrame(dict(x=[12345.6789, 2e6]))
            self.assertEqual(
                str(df), '            x\n0  1.2346e+04\n1  2.0000e+06')
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_precision(self):
        with pd.option_context('display.precision', 10):
            s = Styler(self.df)
        self.assertEqual(s.precision, 10)
        s = Styler(self.df, precision=2)
        self.assertEqual(s.precision, 2)

        s2 = s.set_precision(4)
        self.assertTrue(s is s2)
        self.assertEqual(s.precision, 4)
项目:scheduled-bots    作者:SuLab    | 项目源码 | 文件源码
def _main(log_path, show_browser=False):
    print(log_path)
    df, metadata = process_log(log_path)
    del df['Timestamp']
    df['Msg Type'] = df['Msg Type'].apply(escape_html_chars)
    df['Message'] = df['Message'].apply(escape_html_chars)
    # df['Message'] = df['Message'].apply(try_json)
    df['Message'] = df.apply(lambda row: format_error(row['Msg Type'], row['Message']), 1)
    df['Rev ID'] = df['Rev ID'].apply(lambda x: '<a href="https://www.wikidata.org/w/index.php?oldid={}&diff=prev">{}</a>'.format(x,x) if x else x)

    level_counts, info_counts, warning_counts, error_counts = generate_summary(df)

    warnings_df = df.query("Level == 'WARNING'")
    warnings_df.is_copy = False
    del warnings_df['Level']
    if not warnings_df.empty:
        warnings_df = gen_ext_id_links(warnings_df)
        warnings_df = url_qid(warnings_df, "QID")

    errors_df = df.query("Level == 'ERROR'")
    errors_df.is_copy = False
    del errors_df['Level']
    if not errors_df.empty:
        errors_df = gen_ext_id_links(errors_df)
        errors_df = url_qid(errors_df, "QID")
        # errors_df['Message'] = errors_df['Message'].apply(try_format_error)

    info_df = df.query("Level == 'INFO'")
    info_df.is_copy = False
    del info_df['Level']
    if not info_df.empty:
        info_df = gen_ext_id_links(info_df)
        info_df = url_qid(info_df, "QID")
        info_df.Message = info_df.Message.str.replace("SKIP", "No Action")

    with pd.option_context('display.max_colwidth', -1):
        # this class nonsense is an ugly hack: https://stackoverflow.com/questions/15079118/js-datatables-from-pandas/41536906
        level_counts = level_counts.to_frame().to_html(escape=False)
        info_counts = info_counts.to_frame().to_html(escape=False)
        warning_counts = warning_counts.to_frame().to_html(escape=False)
        error_counts = error_counts.to_frame().to_html(escape=False)
        info_df = info_df.to_html(escape=False, classes='df" id = "info_df')
        warnings_df = warnings_df.to_html(escape=False, classes='df" id = "warning_df')
        errors_df = errors_df.to_html(escape=False, classes='df" id = "error_df')

    template = Template(open(os.path.join(sys.path[0], "template.html")).read())

    s = template.render(name=metadata['name'], run_id=metadata['run_id'],
                        level_counts=level_counts,
                        info_counts=info_counts,
                        warning_counts=warning_counts,
                        error_counts=error_counts,
                        warnings_df=warnings_df, errors_df=errors_df, info_df=info_df)
    out_path = log_path.rsplit(".", 1)[0] + ".html"
    with open(out_path, 'w') as f:
        f.write(s)

    if show_browser:
        webbrowser.open(out_path)
项目:rltools    作者:sisl    | 项目源码 | 文件源码
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('logfiles', type=str, nargs='+')
    parser.add_argument('--fields', type=str, default='ret,avglen,ent,kl,vf_r2,ttotal')
    parser.add_argument('--noplot', action='store_true')
    parser.add_argument('--plotfile', type=str, default=None)
    parser.add_argument('--range_end', type=int, default=None)
    args = parser.parse_args()

    assert len(set(args.logfiles)) == len(args.logfiles), 'Log files must be unique'

    fields = args.fields.split(',')

    # Load logs from all files
    fname2log = {}
    for fname in args.logfiles:
        if ':' in fname:
            os.system('rsync -avrz {} /tmp'.format(fname))
            fname = os.path.join('/tmp', os.path.basename(fname))
        with pd.HDFStore(fname, 'r') as f:
            assert fname not in fname2log
            df = f['log']
            df.set_index('iter', inplace=True)
            fname2log[fname] = df.loc[:args.range_end, fields]

    # Print
    if not args.noplot or args.plotfile is not None:
        import matplotlib
        if args.plotfile is not None:
            matplotlib.use('Agg')
        import matplotlib.pyplot as plt
        plt.style.use('seaborn-colorblind')

    ax = None
    for fname, df in fname2log.items():
        with pd.option_context('display.max_rows', 9999):
            print(fname)
            print(df[-1:])

        if 'vf_r2' in df.keys():
            df['vf_r2'] = np.maximum(0, df['vf_r2'])

        if not args.noplot:
            if ax is None:
                ax = df.plot(subplots=True, title=','.join(args.logfiles))
            else:
                df.plot(subplots=True, title=','.join(args.logfiles), ax=ax, legend=False)
    if args.plotfile is not None:
        plt.savefig(args.plotfile, transparent=True, bbox_inches='tight', dpi=300)
    elif not args.noplot:
        plt.show()
项目:Titanic    作者:GeoffBreemer    | 项目源码 | 文件源码
def transform(self, X, y=None):
        # Suppress SettingWithCopyWarning (alternatively: add a X = X.copy()
        with pd.option_context('mode.chained_assignment', None):
            # --- Convert Embarked
            mapping = {'S': 0,
                       'C': 1,
                       'Q': 2,
                       }
            X.loc[:, 'Embarked'] = X.loc[:, 'Embarked'].replace(mapping, inplace=False)

            # --- Convert Sex
            mapping = {'female': 0,
                       'male': 1
                       }
            X.loc[:, 'Sex'] = X['Sex'].replace(mapping, inplace=False)

            # --- Convert Name to Title
            X.loc[:, 'Title'] = X['Name'].map(lambda name: name.split(',')[1].split('.')[0].strip())

            # a map of more aggregated titles
            mapping = {
                "Capt": 0,  # Officer
                "Col": 0,  # Officer
                "Major": 0,  # Officer
                "Jonkheer": 1,  # Royalty
                "Don": 1,  # Royalty
                "Sir": 1,  # Royalty
                "Dr": 0,  # Officer
                "Rev": 0,  # Officer
                "the Countess": 1,  # Royalty
                "Dona": 1,  # Royalty
                "Mme": 2,  # "Mrs"
                "Mlle": 3,  # "Miss"
                "Ms": 2,  # "Mrs"
                "Mr": 4,  # "Mr"
                "Mrs": 2,  # "Mrs"
                "Miss": 3,  # "Miss"
                "Master": 5,  # "Master"
                "Lady": 1  # "Royalty"
            }
            X.loc[:, 'Title'] = X['Title'].map(mapping)

        X = X.drop('Name', 1)
        return X
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_representation_to_series(self):
        idx1 = DatetimeIndex([], freq='D')
        idx2 = DatetimeIndex(['2011-01-01'], freq='D')
        idx3 = DatetimeIndex(['2011-01-01', '2011-01-02'], freq='D')
        idx4 = DatetimeIndex(
            ['2011-01-01', '2011-01-02', '2011-01-03'], freq='D')
        idx5 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00',
                              '2011-01-01 11:00'], freq='H', tz='Asia/Tokyo')
        idx6 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', pd.NaT],
                             tz='US/Eastern')
        idx7 = DatetimeIndex(['2011-01-01 09:00', '2011-01-02 10:15'])

        exp1 = """Series([], dtype: datetime64[ns])"""

        exp2 = """0   2011-01-01
dtype: datetime64[ns]"""

        exp3 = """0   2011-01-01
1   2011-01-02
dtype: datetime64[ns]"""

        exp4 = """0   2011-01-01
1   2011-01-02
2   2011-01-03
dtype: datetime64[ns]"""

        exp5 = """0   2011-01-01 09:00:00+09:00
1   2011-01-01 10:00:00+09:00
2   2011-01-01 11:00:00+09:00
dtype: datetime64[ns, Asia/Tokyo]"""

        exp6 = """0   2011-01-01 09:00:00-05:00
1   2011-01-01 10:00:00-05:00
2                         NaT
dtype: datetime64[ns, US/Eastern]"""

        exp7 = """0   2011-01-01 09:00:00
1   2011-01-02 10:15:00
dtype: datetime64[ns]"""

        with pd.option_context('display.width', 300):
            for idx, expected in zip([idx1, idx2, idx3, idx4,
                                      idx5, idx6, idx7],
                                     [exp1, exp2, exp3, exp4,
                                      exp5, exp6, exp7]):
                result = repr(Series(idx))
                self.assertEqual(result, expected)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_repr_max_columns_max_rows(self):
        term_width, term_height = get_terminal_size()
        if term_width < 10 or term_height < 10:
            raise nose.SkipTest("terminal size too small, "
                                "{0} x {1}".format(term_width, term_height))

        def mkframe(n):
            index = ['%05d' % i for i in range(n)]
            return DataFrame(0, index, index)

        df6 = mkframe(6)
        df10 = mkframe(10)
        with option_context('mode.sim_interactive', True):
            with option_context('display.width', term_width * 2):
                with option_context('display.max_rows', 5,
                                    'display.max_columns', 5):
                    self.assertFalse(has_expanded_repr(mkframe(4)))
                    self.assertFalse(has_expanded_repr(mkframe(5)))
                    self.assertFalse(has_expanded_repr(df6))
                    self.assertTrue(has_doubly_truncated_repr(df6))

                with option_context('display.max_rows', 20,
                                    'display.max_columns', 10):
                    # Out off max_columns boundary, but no extending
                    # since not exceeding width
                    self.assertFalse(has_expanded_repr(df6))
                    self.assertFalse(has_truncated_repr(df6))

                with option_context('display.max_rows', 9,
                                    'display.max_columns', 10):
                    # out vertical bounds can not result in exanded repr
                    self.assertFalse(has_expanded_repr(df10))
                    self.assertTrue(has_vertically_truncated_repr(df10))

            # width=None in terminal, auto detection
            with option_context('display.max_columns', 100, 'display.max_rows',
                                term_width * 20, 'display.width', None):
                df = mkframe((term_width // 7) - 2)
                self.assertFalse(has_expanded_repr(df))
                df = mkframe((term_width // 7) + 2)
                com.pprint_thing(df._repr_fits_horizontal_())
                self.assertTrue(has_expanded_repr(df))
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def to_clipboard(obj, excel=None, sep=None, **kwargs):  # pragma: no cover
    """
    Attempt to write text representation of object to the system clipboard
    The clipboard can be then pasted into Excel for example.

    Parameters
    ----------
    obj : the object to write to the clipboard
    excel : boolean, defaults to True
            if True, use the provided separator, writing in a csv
            format for allowing easy pasting into excel.
            if False, write a string representation of the object
            to the clipboard
    sep : optional, defaults to tab
    other keywords are passed to to_csv

    Notes
    -----
    Requirements for your platform
      - Linux: xclip, or xsel (with gtk or PyQt4 modules)
      - Windows:
      - OS X:
    """
    from pandas.util.clipboard import clipboard_set
    if excel is None:
        excel = True

    if excel:
        try:
            if sep is None:
                sep = '\t'
            buf = StringIO()
            obj.to_csv(buf, sep=sep, **kwargs)
            clipboard_set(buf.getvalue())
            return
        except:
            pass

    if isinstance(obj, DataFrame):
        # str(df) has various unhelpful defaults, like truncation
        with option_context('display.max_colwidth', 999999):
            objstr = obj.to_string(**kwargs)
    else:
        objstr = str(obj)
    clipboard_set(objstr)
项目:anirban-imitation    作者:Santara    | 项目源码 | 文件源码
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('logfiles', type=str, nargs='+')
    parser.add_argument('--fields', type=str, default='trueret,avglen,ent,kl,vf_r2,vf_kl,tdvf_r2,rloss,racc')
    parser.add_argument('--noplot', action='store_true')
    parser.add_argument('--plotfile', type=str, default=None)
    parser.add_argument('--range_end', type=int, default=None)
    args = parser.parse_args()

    assert len(set(args.logfiles)) == len(args.logfiles), 'Log files must be unique'

    fields = args.fields.split(',')

    # Load logs from all files
    fname2log = {}
    for fname in args.logfiles:
        with pd.HDFStore(fname, 'r') as f:
            assert fname not in fname2log
            df = f['log']
            df.set_index('iter', inplace=True)
            fname2log[fname] = df.loc[:args.range_end, fields]


    # Print stuff
    if not args.noplot or args.plotfile is not None:
        import matplotlib
        if args.plotfile is not None:
            matplotlib.use('Agg')
        import matplotlib.pyplot as plt; plt.style.use('ggplot')

        ax = None
        for fname, df in fname2log.items():
            with pd.option_context('display.max_rows', 9999):
                print fname
                print df[-1:]


            df['vf_r2'] = np.maximum(0,df['vf_r2'])

            if ax is None:
                ax = df.plot(subplots=True, title=fname)
            else:
                df.plot(subplots=True, title=fname, ax=ax, legend=False)
        if not args.noplot:
            plt.show()
        if args.plotfile is not None:
            plt.savefig(args.plotfile, bbox_inches='tight', dpi=200)
项目:ramp-workflow    作者:paris-saclay-cds    | 项目源码 | 文件源码
def _print_df_scores(df_scores, score_types, indent=''):
    """Pretty print the scores dataframe.

    Parameters
    ----------
    df_scores : pd.DataFrame
        the score dataframe
    score_types : list of score types
        a list of score types to use
    indent : str, default=''
        indentation if needed
    """
    try:
        # try to re-order columns/rows in the printed array
        # we may not have all train, valid, test, so need to select
        index_order = np.array(['train', 'valid', 'test'])
        ordered_index = index_order[np.isin(index_order, df_scores.index)]
        df_scores = df_scores.loc[
            ordered_index, [score_type.name for score_type in score_types]]
    except Exception:
        _print_warning("Couldn't re-order the score matrix..")
    with pd.option_context("display.width", 160):
        df_repr = repr(df_scores)
    df_repr_out = []
    for line, color_key in zip(df_repr.splitlines(),
                               [None, None] +
                               list(df_scores.index.values)):
        if line.strip() == 'step':
            continue
        if color_key is None:
            # table header
            line = stylize(line, fg(fg_colors['title']) + attr('bold'))
        if color_key is not None:
            tokens = line.split()
            tokens_bak = tokens[:]
            if 'official_' + color_key in fg_colors:
                # line label and official score bold & bright
                label_color = fg(fg_colors['official_' + color_key])
                tokens[0] = stylize(tokens[0], label_color + attr('bold'))
                tokens[1] = stylize(tokens[1], label_color + attr('bold'))
            if color_key in fg_colors:
                # other scores pale
                tokens[2:] = [stylize(token, fg(fg_colors[color_key]))
                              for token in tokens[2:]]
            for token_from, token_to in zip(tokens_bak, tokens):
                line = line.replace(token_from, token_to)
        line = indent + line
        df_repr_out.append(line)
    print('\n'.join(df_repr_out))