Python pandas 模块,HDFStore() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用pandas.HDFStore()

项目:scikit-dataaccess    作者:MITHaystack    | 项目源码 | 文件源码
def getStationMetadata():
        '''
        Retrieve metadata on groundwater wells

        @return pandas dataframe with groundwater well information
        '''

        data_file = DataFetcher.getDataLocation('groundwater')
        if data_file is None:
            print('Dataset not available')
            return None

        store = pd.HDFStore(data_file,'r')
        meta_data = store['meta_data']
        store.close()


        return meta_data
项目:scikit-dataaccess    作者:MITHaystack    | 项目源码 | 文件源码
def getAntennaLogs():
        '''
        Retrieve information about antenna changes

        @return dictionary of antenna changes
        '''
        store_location = data_util.getDataLocation('ngl_gps')
        store = pd.HDFStore(store_location, 'r')
        logs_df = store['ngl_steps']
        store.close()

        metadata = DataFetcher.getStationMetadata()

        logs_dict = OrderedDict()

        for station in metadata.index:
            offset_dates = logs_df[logs_df['Station']==station].index.unique()
            offset_dates = pd.Series(offset_dates)
            logs_dict[station] = offset_dates

        return logs_dict
项目:future-price-predictor    作者:htfy96    | 项目源码 | 文件源码
def __init__(self, db_h5_file,
                 future_time = 20, lookback = 100, read_first_k_table = -1, normalize=True, two_class=False):
        super(DBGeneticReader, self).__init__()
        self._db = pd.HDFStore(db_h5_file)
        self._future_time = future_time
        self._lookback = lookback
        self._db_len = 0
        self._two_class = two_class

        self._tables = []
        for k in self._db:
            self._db_len += len(self._db[k]) - future_time - lookback
            t = self._db[k].iloc[:, 4:].astype('float32')
            t['AveragePrice'] = (t['AskPrice1'] + t['BidPrice1']) / 2
            if normalize:
                t = (t - t.mean()) / (t.std() + 1e-10)
            self._tables.append(t)
            if read_first_k_table != -1 and len(self._tables) == read_first_k_table:
                break
项目:triage    作者:dssg    | 项目源码 | 文件源码
def _read_hdf_from_buffer(self, buffer):
        with pandas.HDFStore(
                "data.h5",
                mode="r",
                driver="H5FD_CORE",
                driver_core_backing_store=0,
                driver_core_image=buffer.read()) as store:

            if len(store.keys()) > 1:
                raise Exception('Ambiguous matrix store. More than one dataframe in the hdf file.')

            try:
                return store["matrix"]

            except KeyError:
                print("The hdf file should contain one and only key, matrix.")
                return store[store.keys()[0]]
项目:triage    作者:dssg    | 项目源码 | 文件源码
def fake_metta(matrix_dict, metadata):
    """Stores matrix and metadata in a metta-data-like form

    Args:
    matrix_dict (dict) of form { columns: values }.
        Expects an entity_id to be present which it will use as the index
    metadata (dict). Any metadata that should be set

    Yields:
        tuple of filenames for matrix and metadata
    """
    matrix = pandas.DataFrame.from_dict(matrix_dict).set_index('entity_id')
    with tempfile.NamedTemporaryFile() as matrix_file:
        with tempfile.NamedTemporaryFile('w') as metadata_file:
            hdf = pandas.HDFStore(matrix_file.name)
            hdf.put('title', matrix, data_columns=True)
            matrix_file.seek(0)

            yaml.dump(metadata, metadata_file)
            metadata_file.seek(0)
            yield (matrix_file.name, metadata_file.name)
项目:triage    作者:dssg    | 项目源码 | 文件源码
def fake_metta(matrix_dict, metadata):
    """Stores matrix and metadata in a metta-data-like form

    Args:
    matrix_dict (dict) of form { columns: values }.
        Expects an entity_id to be present which it will use as the index
    metadata (dict). Any metadata that should be set

    Yields:
        tuple of filenames for matrix and metadata
    """
    matrix = pandas.DataFrame.from_dict(matrix_dict).set_index('entity_id')
    with tempfile.NamedTemporaryFile() as matrix_file:
        with tempfile.NamedTemporaryFile('w') as metadata_file:
            hdf = pandas.HDFStore(matrix_file.name)
            hdf.put('title', matrix, data_columns=True)
            matrix_file.seek(0)

            yaml.dump(metadata, metadata_file)
            metadata_file.seek(0)
            yield (matrix_file.name, metadata_file.name)
项目:Two-Sigma-Financial-Modeling-Challenge    作者:xiaofeiwen    | 项目源码 | 文件源码
def __init__(self):
        with pd.HDFStore("../input/train.h5", "r") as hfdata:
            self.timestamp = 0
            fullset = hfdata.get("train")
            self.unique_timestamp = fullset["timestamp"].unique()
            # Get a list of unique timestamps
            # use the first half for training and
            # the second half for the test set
            n = len(self.unique_timestamp)
            i = int(n/2)
            timesplit = self.unique_timestamp[i]
            self.n = n
            self.unique_idx = i
            self.train = fullset[fullset.timestamp < timesplit]
            self.test = fullset[fullset.timestamp >= timesplit]

            # Needed to compute final score
            self.full = self.test.loc[:, ['timestamp', 'y']]
            self.full['y_hat'] = 0.0
            self.temp_test_y = None
项目:Quantrade    作者:quant-trade    | 项目源码 | 文件源码
def hdfone_filenames(folder: str, path_to: str) -> List[str]:
    filenames = []
    try:
        assert isinstance(folder, str), "folder isn't string: %s" % folder
        assert isinstance(path_to, str), "path_to isn't string: %s" % path_to

        if settings.DATA_TYPE == "hdfone":
            f = join(settings.DATA_PATH, "hdfone.hdfone")
            if isfile(f):
                with HDFStore(f) as hdf:
                    filenames = [f for f in hdf.keys() if folder in f]
                hdf.close()
        else:
            filenames = multi_filenames(path_to_history=path_to)
    except Exception as err:
        print(colored.red("hdfone_filenames: {}".format(err)))

    return filenames
项目:catalyst    作者:enigmampc    | 项目源码 | 文件源码
def write(self, frames):
        """
        Write the frames to the target HDF5 file, using the format used by
        ``pd.Panel.to_hdf``

        Parameters
        ----------
        frames : iter[(int, DataFrame)] or dict[int -> DataFrame]
            An iterable or other mapping of sid to the corresponding OHLCV
            pricing data.
        """
        with HDFStore(self._path, 'w',
                      complevel=self._complevel, complib=self._complib) \
                as store:
            panel = pd.Panel.from_dict(dict(frames))
            panel.to_hdf(store, 'updates')
        with tables.open_file(self._path, mode='r+') as h5file:
            h5file.set_node_attr('/', 'version', 0)
项目:DGP    作者:DynamicGravitySystems    | 项目源码 | 文件源码
def add_data(self, packet: DataPacket):
        """
        Import a DataFrame into the project
        :param packet: DataPacket custom class containing file path, dataframe, data type and flight association
        :return: Void
        """
        self.log.debug("Ingesting data and exporting to hdf5 store")

        file_uid = 'f' + uuid.uuid4().hex[1:]  # Fixes NaturalNameWarning by ensuring first char is letter ('f').

        with HDFStore(str(self.hdf_path)) as store:
            # Separate data into groups by data type (GPS & Gravity Data)
            # format: 'table' pytables format enables searching/appending, fixed is more performant.
            store.put('{}/{}'.format(packet.data_type, file_uid), packet.data, format='fixed', data_columns=True)
            # Store a reference to the original file path
            self.data_map[file_uid] = packet.path
        try:
            flight = self.flights[packet.flight.uid]
            if packet.data_type == 'gravity':
                flight.gravity = file_uid
            elif packet.data_type == 'gps':
                flight.gps = file_uid
        except KeyError:
            return False
项目:hax    作者:XENON1T    | 项目源码 | 文件源码
def save_cache_file(data, cache_file, **kwargs):
    """Save minitree dataframe + cut history to a cache file
    Any kwargs will be passed to pandas HDFStore. Defaults are:
        complib='blosc'
        complevel=9
    """
    kwargs.setdefault('complib', 'blosc')
    kwargs.setdefault('complevel', 9)
    dirname = os.path.dirname(cache_file)
    if dirname and not os.path.exists(dirname):
        os.makedirs(dirname)
    store = pd.HDFStore(cache_file, **kwargs)
    store.put('data', data)

    # Store the cuts history for the data
    store.get_storer('data').attrs.cut_history = cuts._get_history(data)
    store.close()
项目:two_sigma_financial_modeling    作者:WayneDW    | 项目源码 | 文件源码
def __init__(self):
        with pd.HDFStore("../input/train.h5", "r") as hfdata:
            self.timestamp = 0
            fullset = hfdata.get("train")
            self.unique_timestamp = fullset["timestamp"].unique()
            # Get a list of unique timestamps
            # use the first half for training and
            # the second half for the test set
            n = len(self.unique_timestamp)
            i = int(n/2)
            timesplit = self.unique_timestamp[i]
            self.n = n
            self.unique_idx = i
            self.train = fullset[fullset.timestamp < timesplit]
            self.test = fullset[fullset.timestamp >= timesplit]

            self.y_test_full = self.test['y'] # Just in case the full labels are needed later
            self.y_pred_full = []
            self.temp_test_y = None
            self.ID_COL_NAME = 'id'
            self.SAMPLE_COL_NAME = 'sample'
            self.TARGET_COL_NAME = 'y'
            self.TIME_COL_NAME = 'timestamp'
项目:skp_edu_docker    作者:TensorMSA    | 项目源码 | 文件源码
def _init_frame_node_parm_with_data(self):
        """
        init pamr s need to be calculated
        :return:s
        """
        try :
            store = pd.HDFStore(self.input_paths[0])
            chunk = store.select('table1',
                                 start=0,
                                 stop=100)

            for col_name in self.encode_col:
                if (self.encode_len.get(col_name) == None):
                    if (chunk[col_name].dtype in ['int', 'float']):
                        self.encode_len[col_name] = 1
                        self.input_size = self.input_size + 1
                    else:
                        self.encode_len[col_name] = self.word_vector_size
                        self.input_size = self.input_size + self.word_vector_size
                    self.encode_onehot[col_name] = OneHotEncoder(self.word_vector_size)
                    self.encode_dtype[col_name] = str(chunk[col_name].dtype)
        except Exception as e :
            raise Exception ("error on wcnn feed parm prepare : {0}".format(e))
项目:skp_edu_docker    作者:TensorMSA    | 项目源码 | 文件源码
def _frame_parser(self, file_path, index):
        """
        parse nlp data
        :return:
        """
        try :
            store = pd.HDFStore(file_path)
            chunk = store.select('table1',
                                 start=index.start,
                                 stop=index.stop)
            input_vector = []
            count = index.stop - index.start

            for col_name in self.encode_col:
                if (chunk[col_name].dtype == 'O'):
                    input_vector.append(list(map(lambda x: self.encode_onehot[col_name].get_vector(x),
                                             chunk[col_name][0:count].tolist())))
                else :
                    input_vector.append(np.array(list(map(lambda x: [self._filter_nan(x)], chunk[col_name][0:count].tolist()))))
            return self._flat_data(input_vector, len(chunk[col_name][0:count].tolist()))
        except Exception as e :
            raise Exception (e)
        finally:
            store.close()
项目:skp_edu_docker    作者:TensorMSA    | 项目源码 | 文件源码
def _init_frame_node_parm_with_data(self):
        """
        init pamr s need to be calculated
        :return:s
        """
        try :
            store = pd.HDFStore(self.input_paths[0])
            chunk = store.select('table1',
                                 start=0,
                                 stop=100)

            for col_name in self.encode_col:
                if (self.encode_len.get(col_name) == None):
                    if (chunk[col_name].dtype in ['int', 'float']):
                        self.encode_len[col_name] = 1
                        self.input_size = self.input_size + 1
                    else:
                        self.encode_len[col_name] = self.word_vector_size
                        self.input_size = self.input_size + self.word_vector_size
                    self.encode_onehot[col_name] = OneHotEncoder(self.word_vector_size)
                    self.encode_dtype[col_name] = str(chunk[col_name].dtype)
        except Exception as e :
            raise Exception ("error on wcnn feed parm prepare : {0}".format(e))
项目:skp_edu_docker    作者:TensorMSA    | 项目源码 | 文件源码
def _frame_parser(self, file_path, index):
        """
        parse nlp data
        :return:
        """
        try :
            store = pd.HDFStore(file_path)
            chunk = store.select('table1',
                                 start=index.start,
                                 stop=index.stop)
            input_vector = []
            count = index.stop - index.start

            for col_name in self.encode_col:
                if (chunk[col_name].dtype == 'O'):
                    input_vector.append(list(map(lambda x: self.encode_onehot[col_name].get_vector(x),
                                             chunk[col_name][0:count].tolist())))
                else :
                    input_vector.append(np.array(list(map(lambda x: [self._filter_nan(x)], chunk[col_name][0:count].tolist()))))
            return self._flat_data(input_vector, len(chunk[col_name][0:count].tolist()))
        except Exception as e :
            raise Exception (e)
        finally:
            store.close()
项目:skp_edu_docker    作者:TensorMSA    | 项目源码 | 文件源码
def _nlp_parser(self, file_path, index):
        """
        parse nlp data
        :return:
        """
        try :
            store = pd.HDFStore(file_path)
            chunk = store.select('table1',
                                 start=index.start,
                                 stop=index.stop)
            count = index.stop - index.start
            if (self.encode_col in chunk):
                encode = self.encode_pad(self._preprocess(chunk[self.encode_col].values)[0:count],
                                         max_len=self.encode_len)
                return self._word_embed_data(self.embed_type, encode)
            else:
                warnings.warn("not exists column names requested !!")
                return [['#'] * self.encode_len]
        except Exception as e :
            raise Exception (e)
        finally:
            store.close()
项目:skp_edu_docker    作者:TensorMSA    | 项目源码 | 文件源码
def _convert_data_format(self, file_path, index):
        """

        :param obj:
        :param index:
        :return:
        """
        try :
            return_data = []
            store = pd.HDFStore(file_path)
            chunk = store.select('table1',
                               start=index.start,
                               stop=index.stop)

            for column in self.column_list :
                for line in self._preprocess(chunk[column].values)[index.start:index.stop] :
                    return_data = return_data + line
            return [return_data]
        except Exception as e :
            raise Exception (e)
        finally:
            store.close()
项目:catwalk    作者:dssg    | 项目源码 | 文件源码
def fake_metta(matrix_dict, metadata):
    """Stores matrix and metadata in a metta-data-like form

    Args:
    matrix_dict (dict) of form { columns: values }.
        Expects an entity_id to be present which it will use as the index
    metadata (dict). Any metadata that should be set

    Yields:
        tuple of filenames for matrix and metadata
    """
    matrix = pandas.DataFrame.from_dict(matrix_dict).set_index('entity_id')
    with tempfile.NamedTemporaryFile() as matrix_file:
        with tempfile.NamedTemporaryFile('w') as metadata_file:
            hdf = pandas.HDFStore(matrix_file.name)
            hdf.put('title', matrix, data_columns=True)
            matrix_file.seek(0)

            yaml.dump(metadata, metadata_file)
            metadata_file.seek(0)
            yield (matrix_file.name, metadata_file.name)
项目:catwalk    作者:dssg    | 项目源码 | 文件源码
def _read_hdf_from_buffer(self, buffer):
        with pandas.HDFStore(
                "data.h5",
                mode="r",
                driver="H5FD_CORE",
                driver_core_backing_store=0,
                driver_core_image=buffer.read()) as store:

            if len(store.keys()) > 1:
                raise Exception('Ambiguous matrix store. More than one dataframe in the hdf file.')

            try:
                return store["matrix"]

            except KeyError:
                print("The hdf file should contain one and only key, matrix.")
                return store[store.keys()[0]]
项目:paysage    作者:drckf    | 项目源码 | 文件源码
def test_grbm_reload():
    vis_layer = layers.BernoulliLayer(num_vis)
    hid_layer = layers.GaussianLayer(num_hid)
    # create some extrinsics
    grbm = model.Model([vis_layer, hid_layer])
    with tempfile.NamedTemporaryFile() as file:
        # save the model
        store = pandas.HDFStore(file.name, mode='w')
        grbm.save(store)
        store.close()
        # reload
        store = pandas.HDFStore(file.name, mode='r')
        grbm_reload = model.Model.from_saved(store)
        store.close()
    # check the two models are consistent
    vis_data = vis_layer.random((num_samples, num_vis))
    data_state = model_utils.State.from_visible(vis_data, grbm)
    dropout_scale = model_utils.State.dropout_rescale(grbm)
    vis_orig = grbm.deterministic_iteration(1, data_state, dropout_scale).units[0]
    vis_reload = grbm_reload.deterministic_iteration(1, data_state, dropout_scale).units[0]
    assert be.allclose(vis_orig, vis_reload)
项目:paysage    作者:drckf    | 项目源码 | 文件源码
def save(self, store: pandas.HDFStore) -> None:
        """
        Save a model to an open HDFStore.

        Notes:
            Performs an IO operation.

        Args:
            store (pandas.HDFStore)

        Returns:
            None

        """
        # save the config as an attribute
        config = self.get_config()
        store.put('model', pandas.DataFrame())
        store.get_storer('model').attrs.config = config
        # save the parameters
        for i in range(self.num_weights):
            key = os.path.join('weights', 'weights'+str(i))
            self.weights[i].save_params(store, key)
        for i in range(self.num_layers):
            key = os.path.join('layers', 'layers'+str(i))
            self.layers[i].save_params(store, key)
项目:paysage    作者:drckf    | 项目源码 | 文件源码
def save_params(self, store, key):
        """
        Save the parameters to a HDFStore.

        Notes:
            Performs an IO operation.

        Args:
            store (pandas.HDFStore): the writeable stream for the params.
            key (str): the path for the layer params.

        Returns:
            None

        """
        for i, ip in enumerate(self.params):
            df_params = pandas.DataFrame(be.to_numpy_array(ip))
            store.put(os.path.join(key, 'parameters', 'key'+str(i)), df_params)
项目:paysage    作者:drckf    | 项目源码 | 文件源码
def save_params(self, store, key):
        """
        Save the parameters to a HDFStore.

        Notes:
            Performs an IO operation.

        Args:
            store (pandas.HDFStore): the writeable stream for the params.
            key (str): the path for the layer params.

        Returns:
            None

        """
        for i, ip in enumerate(self.params):
            df_params = pandas.DataFrame(be.to_numpy_array(ip))
            store.put(os.path.join(key, 'parameters', 'key'+str(i)), df_params)
项目:paysage    作者:drckf    | 项目源码 | 文件源码
def load_params(self, store, key):
        """
        Load the parameters from an HDFStore.

        Notes:
            Performs an IO operation.

        Args:
            store (pandas.HDFStore): the readable stream for the params.
            key (str): the path for the layer params.

        Returns:
            None

        """
        params = []
        for i, ip in enumerate(self.params):
            params.append(be.float_tensor(
                store.get(os.path.join(key, 'parameters', 'key'+str(i))).as_matrix()
            ).squeeze()) # collapse trivial dimensions to a vector
        self.params = self.params.__class__(*params)
项目:rnnlab    作者:phueb    | 项目源码 | 文件源码
def get_trajs_mat(self, cols, traj):
        if traj == 'avg_probe_pp':
            with pd.HDFStore(self.pp_traj_df_path, mode='r') as store:
                df_traj = store.select('pp_traj_df', columns=cols)
                trajs_mat = df_traj.values.transpose()
        elif traj == 'avg_probe_ba':
            with pd.HDFStore(self.ba_traj_df_path, mode='r') as store:
                df_traj = store.select('ba_traj_df', columns=cols)
                trajs_mat = df_traj.values.transpose()
        elif 'cat_task' in traj:
            with pd.HDFStore(self.cat_task_traj_df_path, mode='r') as store:
                columns = [traj.replace('cat_task_', '') + '_fold{}'.format(i) for i in cols]
                df_traj = store.select('cat_task_traj_df', columns=columns)
                trajs_mat = df_traj.values.transpose()
        elif 'syn_task' in traj:
            with pd.HDFStore(self.syn_task_traj_df_path, mode='r') as store:
                columns = [traj.replace('syn_task_', '') + '_fold{}'.format(i) for i in cols]
                df_traj = store.select('syn_task_traj_df', columns=columns)
                trajs_mat = df_traj.values.transpose()
        else:
            raise AttributeError('rnnlab: Invalid argument passed to "traj".')
        return trajs_mat
项目:Ads-RecSys-Datasets    作者:Atomu2014    | 项目源码 | 文件源码
def __iter__(self, gen_type='train', batch_size=None, shuffle_block=False, random_sample=False, split_fields=False,
                 on_disk=True, squeeze_output=False, **kwargs):
        gen_type = gen_type.lower()

        if on_disk:
            print('on disk...')

            for hdf_X, hdf_y in self._files_iter_(gen_type=gen_type, shuffle_block=shuffle_block):
                # num_of_lines = pd.HDFStore(hdf_y, mode='r').get_storer('fixed').shape[0]

                X_all = pd.read_hdf(hdf_X, mode='r').as_matrix()
                y_all = pd.read_hdf(hdf_y, mode='r').as_matrix()

                gen = self.generator(X_all, y_all, batch_size, shuffle=random_sample)
                for X, y in gen:
                    if split_fields:
                        X = np.split(X, self.max_length, axis=1)
                        for i in range(self.max_length):
                            X[i] -= self.feat_min[i]
                    if squeeze_output:
                        y = y.squeeze()
                    yield X, y
        else:
            print('not implemented')
项目:dragonboard_testbench    作者:cta-observatory    | 项目源码 | 文件源码
def read_offsets(offsets_file):
    offsets = np.zeros(
            shape=(8, 2, 4096, 40),
            dtype='f4')

    def name_to_channel_gain_id(name):
        _, channel, gain = name.split('_')
        channel = int(channel)
        gain_id = {'high': 0, 'low': 1}[gain]
        return channel, gain_id

    with pd.HDFStore(offsets_file) as st:
        for name in st.keys():
            channel, gain_id = name_to_channel_gain_id(name)
            df = st[name]
            df.sort_values(["cell", "sample"], inplace=True)
            offsets[channel, gain_id] = df["median"].values.reshape(-1, 40)

    return offsets
项目:scikit-dataaccess    作者:MITHaystack    | 项目源码 | 文件源码
def cacheData(self, data_specification):
        '''
        Cache Kepler data locally

        @param data_specification: List of kepler IDs
        '''

        kid_list = data_specification

        data_location = DataFetcher.getDataLocation('kepler')

        if data_location == None:
            data_location = os.path.join(os.path.expanduser('~'),'.skdaccess','kepler')
            os.makedirs(data_location, exist_ok=True)
            data_location = os.path.join(data_location, 'kepler_data.h5')
            DataFetcher.setDataLocation('kepler', data_location)

        store = pd.HDFStore(data_location)

        missing_kid_list = []
        for kid in kid_list:
            if 'kid_' + kid not in store:
                missing_kid_list.append(kid)


        if len(missing_kid_list) > 0:
            print("Downloading data for " + str(len(missing_kid_list)) + " star(s)")
            missing_kid_data = self.downloadKeplerData(missing_kid_list)

            for kid,data in missing_kid_data.items():
                store.put('kid_' + kid, data)

        store.close()
项目:scikit-dataaccess    作者:MITHaystack    | 项目源码 | 文件源码
def output(self):
        ''' 
        Output kepler data wrapper

        @return DataWrapper
        '''
        kid_list = self.ap_paramList[0]()
        kid_list = [ str(kid).zfill(9) for kid in kid_list ]

        self.cacheData(kid_list)

        data_location = DataFetcher.getDataLocation('kepler')

        kid_data = dict()

        store = pd.HDFStore(data_location)

        for kid in kid_list:
            kid_data[kid] = store['kid_' + kid]
            # If downloaded using old skdaccess version
            # switch index
            if kid_data[kid].index.name == 'TIME':
                kid_data[kid]['TIME'] = kid_data[kid].index
                kid_data[kid].set_index('CADENCENO', inplace=True)


        store.close()                
        kid_data = OrderedDict(sorted(kid_data.items(), key=lambda t: t[0]))

        # If a list of quarters is specified, only select data in those quarters
        if self.quarter_list != None:        
            for kid in kid_list:
                kid_data[kid] = kid_data[kid][kid_data[kid]['QUARTER'].isin(self.quarter_list)]


        return TableWrapper(kid_data, default_columns = ['PDCSAP_FLUX'], default_error_columns = ['PDCSAP_FLUX_ERR'])
项目:scikit-dataaccess    作者:MITHaystack    | 项目源码 | 文件源码
def _rawData(self):
        ''' 
        Select data from sites within site radius to be returned without stabilization.
        ''' 
        storeName = self.meta_data
        keyList = self._validStations(storeName)
        if len(keyList) == 0:
            self._validInit = 0
        else:
            storeData_fn = DataFetcher.getDataLocation('pbo')
            if storeData_fn is None:
                print('Dataset not available')
                return None

            storeData = pd.HDFStore(storeData_fn)
            mdyratio = self._mdyratio

            smSet_all, smHdr_all = pbo_util.nostab_sys(storeName,storeData,[self._start_time,self._end_time],indx=keyList,mdyratio=mdyratio,
                                                       use_progress_bar = self.use_progress_bar)

            self._smSet_all = smSet_all
            self._smHdr_all = smHdr_all
            storeData.close()
            if len(self._smSet_all) == 0:
                self._validInit = 0
            else:
                self._validInit = 1
项目:scikit-dataaccess    作者:MITHaystack    | 项目源码 | 文件源码
def getAntennaLogs():
        '''
        Get antenna logs.

        @return dictionary of data frames containing antenna logs
        '''

        meta_data = DataFetcher.getStationMetadata()

        storeData_fn = DataFetcher.getDataLocation('pbo')
        if storeData_fn is None:
            print('Dataset not available')
            return None

        store = pd.HDFStore(storeData_fn, 'r')
        logs = store['/antenna_logs']

        antenna_dict = dict()

        # for label in meta_data.keys():
        #     try:
        #         antenna_dict[label] = store['/antenna_log_' + label]
        #     except:
        #         pass

        for label in meta_data.keys():
            if len(logs[logs['Station'] == label]) > 0:
                antenna_dict[label] = logs[logs['Station'] == label]['Date']

        store.close()

        return antenna_dict
项目:scikit-dataaccess    作者:MITHaystack    | 项目源码 | 文件源码
def getStationMetadata():
        '''
        Get station metadata

        @return data frame of station metadata
        '''
        store_location = data_util.getDataLocation('ngl_gps')
        store = pd.HDFStore(store_location, 'r')
        metadata = store['metadata']
        store.close()

        metadata.loc[:,'Lon'] = (metadata.loc[:,'Lon'] + 180) % 360 - 180

        return metadata
项目:PortfolioTimeSeriesAnalysis    作者:MizioAnd    | 项目源码 | 文件源码
def save_dataframe(self, df):
        with pd.HDFStore(''.join([TwoSigmaFinModTools._save_path, 'train_debug', self.timestamp, '.h5']), "w") as train:
            train.put("train_debug", df)
项目:PortfolioTimeSeriesAnalysis    作者:MizioAnd    | 项目源码 | 文件源码
def load_dataframe():
        dataframe_name = 'train_debug'

        # one-hot encoded
        # not one-hot
        # date_time = '20170613_19h09m40s'
        # date_time = '20170613_19h34m31s'
        # date_time = '20170614_00h07m32s'
        date_time = '20170619_11h47m22s'
        with pd.HDFStore(''.join([TwoSigmaFinModTools._save_path, dataframe_name, date_time, '.h5']), 'r') as train:
            return train.get(dataframe_name)
项目:pyrsss    作者:butala    | 项目源码 | 文件源码
def write_hdf(hdf_fname, df, key, header):
    """
    Output the contents of *df* and *header* to the HDF file
    *hdf_fname* under identifier *key*.
    """
    with PD.HDFStore(hdf_fname) as store:
        store.put(key, df)
        store.get_storer(key).attrs.header = header
    return hdf_fname
项目:pyrsss    作者:butala    | 项目源码 | 文件源码
def read_hdf(hdf_fname, key):
    """
    Read contents of HDF file *hdf_fname* associated with *key* and
    return a :class:`DataFrame`, header tuple.
    """
    if not os.path.isfile(hdf_fname):
        raise ValueError('file {} does not exist'.format(hdf_fname))
    with PD.HDFStore(hdf_fname) as store:
        df = store.get(key)
        header = store.get_storer(key).attrs.header
        return df, header
项目:feagen    作者:ianlini    | 项目源码 | 文件源码
def setUp(self):
        self.test_output_dir = mkdtemp(prefix="feagen_test_output_")
        pandas_hdf_path = join(self.test_output_dir, "pandas.h5")
        self.hdf_store = pd.HDFStore(pandas_hdf_path)
项目:feagen    作者:ianlini    | 项目源码 | 文件源码
def __init__(self, hdf_path):
        hdf_dir = os.path.dirname(hdf_path)
        if hdf_dir != '':
            mkdir_p(hdf_dir)
        self.hdf_store = pd.HDFStore(hdf_path)
项目:triage    作者:dssg    | 项目源码 | 文件源码
def _get_head_of_matrix(self):
        try:
            hdf = pandas.HDFStore(self.matrix_path)
            key = hdf.keys()[0]
            head_of_matrix = hdf.select(key, start=0, stop=1)
            head_of_matrix.set_index(self.metadata['indices'], inplace=True)
            self._head_of_matrix = head_of_matrix
        except pandas.error.EmptyDataError:
            self._head_of_matrix = None
项目:triage    作者:dssg    | 项目源码 | 文件源码
def _write_hdf_to_buffer(self, df):
        with pandas.HDFStore(
                "data.h5",
                mode="w",
                driver="H5FD_CORE",
                driver_core_backing_store=0) as out:
            out["matrix"] = df
            return out._handle.get_file_image()
项目:johnson-county-ddj-public    作者:dssg    | 项目源码 | 文件源码
def export_data_table(self, table, end_date, label, feature_names):
        """ Save a data set as an HDF table for later reuse.

        :param table: the DataFrame to save
        :type table: pandas DataFrame
        :param end_date: end of labeling period
        :type end_date: a date format of some kind
        :param label: name of the column containing labels
        :type label: str
        :param feature_names: names of the columns containing features
        :type feature_names: list
        :return: the prefix of the HDF filename
        :rtype: str
        """
        if type(end_date) == np.datetime64:
            end_date = np.datetime_as_string(end_date,
                                             timezone = 'local')[:10]
        else:
            end_date = end_date.to_datetime().date().isoformat()

        file_name = self.export_metadata(end_date, label, feature_names)
        file_path = '{0}/{1}.h5'.format(self.results_directory, file_name)

        if not os.path.exists(file_path):
            store = pd.HDFStore(file_path)
            store['df'] = table
            store.close()

        self.upload_file_to_s3('{0}.h5'.format(file_name), 'hdf_bucket_name',
                               file_path)

        print("uploaded hdf to s3")

        return(file_name)
项目:roleo    作者:tony-hong    | 项目源码 | 文件源码
def __init__(self, hdfstore, tablename):
        if isinstance(hdfstore, pd.HDFStore):
            self.store = hdfstore
        else:
            self.store = pd.HDFStore(hdfstore, "r")

        self.tablename = tablename
项目:roleo    作者:tony-hong    | 项目源码 | 文件源码
def __init__(self, store, tablename, vecnamecol, lengthcol):
        if isinstance(store, pd.HDFStore):
            self.store = store
        else:
            self.store = pd.HDFStore(store, "r")

        self.tablename = tablename
        self.vecnamecol = vecnamecol
        self.lengthcol = lengthcol
项目:roleo    作者:tony-hong    | 项目源码 | 文件源码
def __init__(self, storesdict):
        self.stores = {}
        for key in storesdict:
            if isinstance(storesdict[key], pd.HDFStore):
                self.stores[key] = storesdict[key]
            else:
                self.stores[key] = pd.HDFStore(storesdict[key], "r")

        self.indices = {}
项目:DGP    作者:DynamicGravitySystems    | 项目源码 | 文件源码
def load_data(self, uid: str, prefix: str):
        """
        Load data from a specified group (prefix) - gps or gravity, from the projects HDF5 store.
        :param str uid: Datafile Unique Identifier
        :param str prefix: Data type prefix [gps or gravity]
        :return:
        """
        with HDFStore(str(self.hdf_path)) as store:
            try:
                data = store.get('{}/{}'.format(prefix, uid))
            except KeyError:
                return None
            else:
                return data
项目:pyopen    作者:timodonnell    | 项目源码 | 文件源码
def load(args, filename):
        return pandas.HDFStore(filename)
项目:PythonTrading    作者:F2011B    | 项目源码 | 文件源码
def query_from_to(symbol,Start,today):
    store = pd.HDFStore(Constants.StockHDF)

    if not (symbol in store.keys()):
        return None

    store[]
项目:PythonTrading    作者:F2011B    | 项目源码 | 文件源码
def get_taylor_table():
    store = pd.HDFStore(Constants.DatabaseTaylorCP)
    StoredDF = pd.DataFrame()
    for key in store.keys():

        DF = store[key].tail(1)
        DF['SymbolID'] = key
        StoredDF = pd.concat([StoredDF, DF[['SymbolID', 'MO', 'MLo', 'MHi','TaylorDay']]], axis=0)
    store.close()

    return StoredDF.to_html()
项目:PythonTrading    作者:F2011B    | 项目源码 | 文件源码
def calc_oz_series_pandas(symbol, numWeeksBack=20, averageTf='W'):
    timeFrameMap={'W':(1*numWeeksBack),
                  '3M':(numWeeksBack*15),
                  'Q':(numWeeksBack*15),
                  'M':(numWeeksBack*4)}
    print(Constants.StockHDF)
    store = pd.HDFStore(Constants.StockHDF)
    symbolKey = symbol + '_'+ averageTf

    today = datetime.datetime.now()  # - datetime.timedelta(days=1)
    day_of_week = today.weekday()
    weekStart = today - datetime.timedelta(days=day_of_week + 1)

    if not (symbolKey in store.keys()):
        print('Symbol:'+symbol)
        weekly_DF = getWeeklyDF(timeFrameMap[averageTf], symbol)
        #print(weekly_DF)
        newDF=calc_OZ_pandas(weekly_DF,averageTf=averageTf)
        store[symbolKey] = newDF
        store.flush()
        #print('READ')


    lenStore = len(store[symbolKey]) - 1
    if not (store[symbolKey].index[lenStore].date() == weekStart.date()):
        weekly_DF = getWeeklyDF(timeFrameMap[averageTf], symbol)
        newDF=calc_OZ_pandas(weekly_DF,averageTf=averageTf)
        store[symbolKey] = newDF
        store.flush()

    return store[symbolKey]