Python pandas 模块,__version__() 实例源码

我们从Python开源项目中,提取了以下22个代码示例,用于说明如何使用pandas.__version__()

项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def write_legacy_pickles(output_dir):

    # make sure we are < 0.13 compat (in py3)
    try:
        from pandas.compat import zip, cPickle as pickle  # noqa
    except:
        import pickle

    version = pandas.__version__

    print("This script generates a storage file for the current arch, system, "
          "and python version")
    print("  pandas version: {0}".format(version))
    print("  output dir    : {0}".format(output_dir))
    print("  storage format: pickle")

    pth = '{0}.pickle'.format(platform_name())

    fh = open(os.path.join(output_dir, pth), 'wb')
    pickle.dump(create_pickle_data(), fh, pickle.HIGHEST_PROTOCOL)
    fh.close()

    print("created pickle file: %s" % pth)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_nan_selection_bug_4858(self):

        # GH 4858; nan selection bug, only works for pytables >= 3.1
        if LooseVersion(tables.__version__) < '3.1.0':
            raise nose.SkipTest('tables version does not support fix for nan '
                                'selection bug: GH 4858')

        with ensure_clean_store(self.path) as store:

            df = DataFrame(dict(cols=range(6), values=range(6)),
                           dtype='float64')
            df['cols'] = (df['cols'] + 10).apply(str)
            df.iloc[0] = np.nan

            expected = DataFrame(dict(cols=['13.0', '14.0', '15.0'], values=[
                                 3., 4., 5.]), index=[3, 4, 5])

            # write w/o the index on that particular column
            store.append('df', df, data_columns=True, index=['cols'])
            result = store.select('df', where='values>2.0')
            assert_frame_equal(result, expected)
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def test_legacy_table_write(self):
        raise nose.SkipTest("cannot write legacy tables")

        store = HDFStore(tm.get_data_path(
            'legacy_hdf/legacy_table_%s.h5' % pandas.__version__), 'a')

        df = tm.makeDataFrame()
        wp = tm.makePanel()

        index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
                                   ['one', 'two', 'three']],
                           labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
                                   [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
                           names=['foo', 'bar'])
        df = DataFrame(np.random.randn(10, 3), index=index,
                       columns=['A', 'B', 'C'])
        store.append('mi', df)

        df = DataFrame(dict(A='foo', B='bar'), index=lrange(10))
        store.append('df', df, data_columns=['B'], min_itemsize={'A': 200})
        store.append('wp', wp)

        store.close()
项目:skutil    作者:tgsmith61591    | 项目源码 | 文件源码
def get_pandas_status():
    try:
        import pandas as pd
        return _check_version(pd.__version__, pandas_min_version)
    except ImportError:
        traceback.print_exc()
        return default_status
项目:skutil    作者:tgsmith61591    | 项目源码 | 文件源码
def get_sklearn_status():
    try:
        import sklearn as sk
        return _check_version(sk.__version__, sklearn_min_version)
    except ImportError:
        traceback.print_exc()
        return default_status
项目:skutil    作者:tgsmith61591    | 项目源码 | 文件源码
def get_numpy_status():
    try:
        import numpy as np
        return _check_version(np.__version__, numpy_min_version)
    except ImportError:
        traceback.print_exc()
        return default_status
项目:skutil    作者:tgsmith61591    | 项目源码 | 文件源码
def get_scipy_status():
    try:
        import scipy as sc
        return _check_version(sc.__version__, scipy_min_version)
    except ImportError:
        traceback.print_exc()
        return default_status
项目:skutil    作者:tgsmith61591    | 项目源码 | 文件源码
def get_h2o_status():
    try:
        import h2o
        return _check_version(h2o.__version__, h2o_min_version)
    except ImportError:
        traceback.print_exc()
        return default_status
项目:SecuML    作者:ANSSI-FR    | 项目源码 | 文件源码
def sortDataFrame(df, column, ascending, inplace):
    if pd.__version__ in ['0.13.0', '0.14.1']:
        new_df = df.sort([column], ascending = [ascending], inplace = inplace)
    else:
        new_df = df.sort_values([column], ascending = [ascending], inplace = inplace)
    return new_df
项目:image-app    作者:ajay-d    | 项目源码 | 文件源码
def print_my_path():
    print('cwd:     {}'.format(getcwd()))
    print('__file__:{}'.format(__file__))
    print('abspath: {}'.format(path.abspath(__file__)))

    print('tensorflow: {}'.format(tf.__version__))
    print('pandas: {}'.format(pd.__version__))
项目:auto_ml    作者:ClimbsRocks    | 项目源码 | 文件源码
def add_date_features_df(col_data, date_col):
    # Pandas nicely tries to prevent you from doing stupid things, like setting values on a copy of a df, not your real one
    # However, it's a bit overzealous in this case, so we'll side-step a bunch of warnings by setting is_copy to false here

    result = {}

    col_data = pd.to_datetime(col_data)

    if pandas_version < '0.20.0':
        result[date_col + '_day_of_week'] = col_data.apply(lambda x: x.weekday()).astype(int, raise_on_error=False)
    else:
        result[date_col + '_day_of_week'] = col_data.apply(lambda x: x.weekday()).astype(int, errors='ignore')

    try:
        if pandas_version < '0.20.0':
            result[date_col + '_hour'] = col_data.apply(lambda x: x.hour).astype(int, raise_on_error=False)
        else:
            result[date_col + '_hour'] = col_data.apply(lambda x: x.hour).astype(int, errors='ignore')


        result[date_col + '_minutes_into_day'] = col_data.apply(lambda x: x.hour * 60 + x.minute)

        result[date_col + '_hour'] = result[date_col + '_hour'].fillna(0)
        result[date_col + '_minutes_into_day'] = result[date_col + '_minutes_into_day'].fillna(0)
    except AttributeError:
        pass

    result[date_col + '_is_weekend'] = col_data.apply(lambda x: x.weekday() in (5,6))
    result[date_col + '_day_part'] = result[date_col + '_minutes_into_day'].apply(minutes_into_day_parts)

    result[date_col + '_day_of_week'] = result[date_col + '_day_of_week'].fillna(0)
    result[date_col + '_is_weekend'] = result[date_col + '_is_weekend'].fillna(0)
    result[date_col + '_day_part'] = result[date_col + '_day_part'].fillna(0)
    return result

# Same logic as above, except implemented for a single dictionary, which is much faster at prediction time when getting just a single prediction
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def platform_name():
    return '_'.join([str(pandas.__version__), str(pl.machine()),
                     str(pl.system().lower()), str(pl.python_version())])
项目:PyDataLondon29-EmbarrassinglyParallelDAWithAWSLambda    作者:SignalMedia    | 项目源码 | 文件源码
def write_legacy_msgpack(output_dir, compress):

    version = pandas.__version__

    print("This script generates a storage file for the current arch, "
          "system, and python version")
    print("  pandas version: {0}".format(version))
    print("  output dir    : {0}".format(output_dir))
    print("  storage format: msgpack")
    pth = '{0}.msgpack'.format(platform_name())
    to_msgpack(os.path.join(output_dir, pth), create_msgpack_data(),
               compress=compress)

    print("created msgpack file: %s" % pth)
项目:TrilinosDrivers    作者:jjellio    | 项目源码 | 文件源码
def sanity_check():
  """
  Report the version number of the core packages we use

  :return: Nothing
  """
  import matplotlib
  print('matplotlib: {}'.format(matplotlib.__version__))
  print('numpy: {}'.format(np.__version__))
  print('pandas: {}'.format(pd.__version__))


###############################################################################
项目:TrilinosDrivers    作者:jjellio    | 项目源码 | 文件源码
def sanity_check():
  import matplotlib
  print('matplotlib: {}'.format(matplotlib.__version__))
  print('numpy: {}'.format(np.__version__))
  print('pandas: {}'.format(pd.__version__))
项目:TrilinosDrivers    作者:jjellio    | 项目源码 | 文件源码
def sanity_check():
  """
  Report the version number of the core packages we use

  :return: Nothing
  """
  import matplotlib
  print('matplotlib: {}'.format(matplotlib.__version__))
  print('numpy: {}'.format(np.__version__))
  print('pandas: {}'.format(pd.__version__))


###############################################################################
项目:decoding_challenge_cortana_2016_3rd    作者:kingjr    | 项目源码 | 文件源码
def check_version(library, min_version):
    """Check minimum library version required

    Parameters
    ----------
    library : str
        The library name to import. Must have a ``__version__`` property.
    min_version : str
        The minimum version string. Anything that matches
        ``'(\\d+ | [a-z]+ | \\.)'``

    Returns
    -------
    ok : bool
        True if the library exists with at least the specified version.
    """
    ok = True
    try:
        library = __import__(library)
    except ImportError:
        ok = False
    else:
        this_version = LooseVersion(library.__version__)
        if this_version < min_version:
            ok = False
    return ok
项目:omniduct    作者:airbnb    | 项目源码 | 文件源码
def serialize(cls, formatted_data, fh):
        # compat: if pandas is old, to_pickle does not accept file handles
        if LooseVersion(pd.__version__) <= LooseVersion('0.20.3'):
            fh.close()
            fh = fh.name
        return pd.to_pickle(formatted_data, fh)
项目:WellApplication    作者:inkenbrandt    | 项目源码 | 文件源码
def hourly_resample(df, bse=0, minutes=60):
    """
    Args:
        df:
            pandas dataframe containing time series needing resampling
        bse (int):
            base time to set; optional; default is zero (on the hour);
        minutes (int):
            sampling recurrence interval in minutes; optional; default is 60 (hourly samples)
    Returns:
        A Pandas DataFrame that has been resampled to every hour, at the minute defined by the base (bse)
    Description:
        see http://pandas.pydata.org/pandas-docs/dev/generated/pandas.DataFrame.resample.html for more info
        This function uses pandas powerful time-series manipulation to upsample to every minute, then downsample to every hour,
        on the hour.
        This function will need adjustment if you do not want it to return hourly samples, or iusgsGisf you are sampling more frequently than
        once per minute.
        see http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases
    """
    if int(str(pd.__version__).split('.')[0]) == 0 and int(str(pd.__version__).split('.')[1]) < 18: # pandas versioning
        df = df.resample('1Min')
    else:
        # you can make this smaller to accomodate for a higher sampling frequency
        df = df.resample('1Min').first()  

    # http://pandas.pydata.org/pandas-docs/dev/generated/pandas.Series.interpolate.html
    df = df.interpolate(method='time', limit=90)

    if int(str(pd.__version__).split('.')[0]) == 0 and int(str(pd.__version__).split('.')[1]) < 18: # pandas versioning
        df = df.resample(str(minutes) + 'Min', closed='left', label='left', base=bse)
    else:
        # modify '60Min' to change the resulting frequency
        df = df.resample(str(minutes) + 'Min', closed='left', label='left', base=bse).first()  
    return df
项目:psyplot    作者:Chilipp    | 项目源码 | 文件源码
def _get_versions(requirements=True):
    if requirements:
        import matplotlib as mpl
        import xarray as xr
        import pandas as pd
        import numpy as np
        return {'version': __version__,
                'requirements': {'matplotlib': mpl.__version__,
                                 'xarray': xr.__version__,
                                 'pandas': pd.__version__,
                                 'numpy': np.__version__,
                                 'python': ' '.join(sys.version.splitlines())}}
    else:
        return {'version': __version__}
项目:auto_ml    作者:ClimbsRocks    | 项目源码 | 文件源码
def fit(self, X_df, y=None):
        print('Running basic data cleaning')

        self.vals_to_drop = set(['ignore', 'output', 'regressor', 'classifier'])

        # See if we should fit TfidfVectorizer or not
        for key in X_df.columns:

            if X_df[key].dtype == 'object' and self.column_descriptions.get(key, False) not in ['categorical', 'ignore', 'nlp']:

                # First, make sure that the values in this column are not just ints, or float('nan')
                vals = X_df[key].sample(n=10)
                is_categorical = False
                for val in vals:
                    try:
                        if val is not None:
                            float(val)
                    except Exception as e:
                        print(e)
                        is_categorical = True

                if is_categorical:
                    print('\n')
                    print('Encountered a column that is not marked as categorical, but is an "object" pandas type, which typically indicates a categorical column.')
                    print('The name of this columns is: "{}"'.format(key))
                    print('Some example features in this column are: {}'.format(list(X_df[key].sample(n=5))))
                    print('If this is a categorical column, please mark it as `{}: "categorical"` as part of your column_descriptions'.format(key))
                    print('If this is not a categorical column, please consider converting its dtype before passing data into auto_ml')
                    print('\n')
                    warnings.warn('Consider marking the "{}" column as categorical'.format(key))

            if self.transformed_column_descriptions.get(key) is None:
                self.transformed_column_descriptions[key] = 'continuous'

            if key in self.text_columns:
                X_df[key].fillna('nan', inplace=True)
                if pandas_version < '0.20.0':
                    text_col = X_df[key].astype(str, raise_on_error=False)
                else:
                    text_col = X_df[key].astype(str, errors='ignore')
                self.text_columns[key].fit(text_col)

                col_names = self.text_columns[key].get_feature_names()

                # Make weird characters play nice, or just ignore them :)
                for idx, word in enumerate(col_names):
                    try:
                        col_names[idx] = str(word)
                    except:
                        col_names[idx] = 'non_ascii_word_' + str(idx)

                col_names = ['nlp_' + key + '_' + str(word) for word in col_names]

                self.text_columns[key].cleaned_feature_names = col_names

        return self
项目:pyndl    作者:quantling    | 项目源码 | 文件源码
def _attributes(event_path, number_events, alpha, betas, lambda_, cpu_time,
                wall_time, function, method=None, attrs=None):
    width = max([len(ss) for ss in (event_path,
                                    str(number_events),
                                    str(alpha),
                                    str(betas),
                                    str(lambda_),
                                    function,
                                    str(method),
                                    socket.gethostname(),
                                    getpass.getuser())])
    width = max(19, width)

    def _format(value):
        return '{0: <{width}}'.format(value, width=width)

    if not type(alpha) in (float, int):
        alpha = 'varying'

    new_attrs = {'date': _format(time.strftime("%Y-%m-%d %H:%M:%S")),
                 'event_path': _format(event_path),
                 'number_events': _format(number_events),
                 'alpha': _format(str(alpha)),
                 'betas': _format(str(betas)),
                 'lambda': _format(str(lambda_)),
                 'function': _format(function),
                 'method': _format(str(method)),
                 'cpu_time': _format(str(cpu_time)),
                 'wall_time': _format(str(wall_time)),
                 'hostname': _format(socket.gethostname()),
                 'username': _format(getpass.getuser()),
                 'pyndl': _format(__version__),
                 'numpy': _format(np.__version__),
                 'pandas': _format(pd.__version__),
                 'xarray': _format(xr.__version__),
                 'cython': _format(cython.__version__)}

    if attrs is not None:
        for key in set(attrs.keys()) | set(new_attrs.keys()):
            if key in attrs:
                old_val = attrs[key]
            else:
                old_val = ''
            if key in new_attrs:
                new_val = new_attrs[key]
            else:
                new_val = format_('')
            new_attrs[key] = old_val + ' | ' + new_val
    return new_attrs