我们从Python开源项目中,提取了以下5个代码示例,用于说明如何使用pandas.read_msgpack()。
def __init__(self, path=None, lock=None, clean_on_failure=True, serialization='msgpack'): self.path = path if path is not None else mkdtemp() self.lock = lock if lock is not None else nop_context self.clean_on_failure = clean_on_failure if serialization == 'msgpack': self.serialize = pd.DataFrame.to_msgpack self.deserialize = pd.read_msgpack self._protocol = None else: s = serialization.split(':', 1) if s[0] != 'pickle': raise ValueError( "'serialization' must be either 'msgpack' or 'pickle[:n]'", ) self._protocol = int(s[1]) if len(s) == 2 else None self.serialize = self._serialize_pickle self.deserialize = pickle.load ensure_directory(self.path)
def df_from_bytes_msgpack_(bytes_: bytes) -> pd.DataFrame: try: df = pd.read_msgpack(BytesIO(bytes_)) except UnicodeDecodeError: raise DataFrameLoadException("Not a DataFrame") if not isinstance(df, pd.DataFrame): raise DataFrameLoadException("Not a DataFrame") return df
def load_bytes(self, bytestring, data_source=''): load_methods = [ msgpack_lz4_to_series, pd.read_msgpack, pickle.loads, ] seria = None for loader in load_methods: try: loaded = loader(bytestring) except Exception as err: continue if isinstance(loaded, pd.Series): seria = [loaded] elif isinstance(loaded, pd.DataFrame): seria = list(map( itemgetter(1), loaded.iteritems() )) elif isinstance(loaded, list): seria = loaded else: logger.error('Unexpected object found: {:.30}... (using deserializer {}' ''.format(seria, loader)) return if seria is None: logger.error('Could not deserialize contents of {} with any of {}' ''.format(data_source, load_methods)) return for idx, series in enumerate(seria): if not series.name: if os.path.exists(data_source): prefix = os.path.split(data_source)[1] else: prefix=data_source series.name = '{}_{}'.join(map(str, [prefix, idx])) self.model.add_dataitem(series, name=series.name) logger.info('Loaded "{n}" ({v} values) from {src}' ''.format(n=series.name, v=len(series), src=data_source))
def df_multi_reader(filename: str, limit: bool=False) -> PandasDF: df = DataFrame() try: assert isinstance(filename, str), "filename isn't string %s" % filename assert isinstance(limit, bool), "limit isn't bool %s" % limit if settings.DATA_TYPE == "pickle": f = filename + ".mp" if isfile(f): df = read_pickle(f) if settings.DATA_TYPE == "proto2": f = filename + ".pr2" if isfile(f): df = read_pickle(f) if settings.DATA_TYPE == "messagepack": f = filename + ".pack" if isfile(f): df = read_msgpack(f) if settings.DATA_TYPE == "json": f = filename + ".json" if isfile(f): df = read_json(f) if settings.DATA_TYPE == "feather": #TODO feather doesn't handle indexes f = filename + ".fth" if isfile(f): df = read_feather(f).reset_index() if settings.DATA_TYPE == "hdf": f = filename + ".hdf" if isfile(f): df = read_hdf(f, key=filename) if settings.DATA_TYPE == "hdfone": f = join(settings.DATA_PATH, "hdfone.hdfone") if isfile(f): df = read_hdf(f, key=filename, mode='r') if limit: if len(df.index) > 0: df = df.last(settings.LIMIT_MONTHS) except Exception as err: print(colored.red("MultiReader {}".format(err))) return df
def nonasy_df_multi_reader(filename: str, limit: bool=False) -> PandasDF: df = DataFrame() try: assert isinstance(filename, str), "filename isn't string: %s" % filename assert isinstance(limit, bool), "limit isn't bool: %s" % limit if settings.DATA_TYPE == "pickle": f = filename + ".mp" if isfile(f): df = read_pickle(f) if settings.DATA_TYPE == "proto2": f = filename + ".pr2" if isfile(f): df = read_pickle(f) if settings.DATA_TYPE == "messagepack": f = filename + ".pack" if isfile(f): df = read_msgpack(f) if settings.DATA_TYPE == "json": f = filename + ".json" if isfile(f): df = read_json(f) if settings.DATA_TYPE == "feather": #TODO feather doesn't handle indexes f = filename + ".fth" if isfile(f): df = read_feather(f).reset_index() if settings.DATA_TYPE == "hdf": f = filename + ".hdf" if isfile(f): df = read_hdf(f, key=filename) if settings.DATA_TYPE == "hdfone": f = join(settings.DATA_PATH, "hdfone.hdfone") if isfile(f): df = read_hdf(f, key=filename, mode='r') if limit: df = df.last(settings.LIMIT_MONTHS) except Exception as err: print(colored.red("MultiReader {}".format(err))) return df