Python pandas 模块,Dataframe() 实例源码

我们从Python开源项目中,提取了以下16个代码示例,用于说明如何使用pandas.Dataframe()

项目:prophet    作者:facebook    | 项目源码 | 文件源码
def add_group_component(self, components, name, group):
        """Adds a component with given name that contains all of the components
        in group.

        Parameters
        ----------
        components: Dataframe with components.
        name: Name of new group component.
        group: List of components that form the group.

        Returns
        -------
        Dataframe with components.
        """
        new_comp = components[components['component'].isin(set(group))].copy()
        new_comp['component'] = name
        components = components.append(new_comp)
        return components
项目:prophet    作者:facebook    | 项目源码 | 文件源码
def predictive_samples(self, df):
        """Sample from the posterior predictive distribution.

        Parameters
        ----------
        df: Dataframe with dates for predictions (column ds), and capacity
            (column cap) if logistic growth.

        Returns
        -------
        Dictionary with keys "trend", "seasonal", and "yhat" containing
        posterior predictive samples for that component. "seasonal" is the sum
        of seasonalities, holidays, and added regressors.
        """
        df = self.setup_dataframe(df.copy())
        sim_values = self.sample_posterior_predictive(df)
        return sim_values
项目:jdata    作者:learn2Pro    | 项目源码 | 文件源码
def get_actions(start_date, end_date):
    """

    :param start_date:
    :param end_date:
    :return: actions: pd.Dataframe
    """
    dump_path = './cache/all_action_%s_%s.pkl' % (start_date, end_date)
    if os.path.exists(dump_path):
        actions = pickle.load(open(dump_path))
    else:
        action_1 = get_actions_1()
        action_2 = get_actions_2()
        action_3 = get_actions_3()
        actions = pd.concat([action_1, action_2, action_3])  # type: pd.DataFrame
        actions = actions[(actions.time >= start_date) & (actions.time < end_date)]
        pickle.dump(actions, open(dump_path, 'w'))
    return actions
项目:JData    作者:Xls1994    | 项目源码 | 文件源码
def get_actions(start_date, end_date):
    """
    ????????????action??
    :param start_date:
    :param end_date:
    :return: actions: pd.Dataframe
    """
    dump_path = './cache/all_action_%s_%s.csv' % (start_date, end_date)
    if os.path.exists(dump_path):
        # actions = pickle.load(open(dump_path))
        actions = pd.read_csv(dump_path)
    else:
        action_1 = get_actions_1()
        action_2 = get_actions_2()
        action_3 = get_actions_3()
        actions = pd.concat([action_1, action_2, action_3])  # type: pd.DataFrame
        actions = actions[(actions.time >= start_date) & (actions.time < end_date)]
        # pickle.dump(actions, open(dump_path, 'w'))
        actions.to_csv(dump_path, index=False)
    print 'action combination finish...'
    return actions
项目:JData    作者:edvardHua    | 项目源码 | 文件源码
def get_actions(start_date, end_date):
    """
    ????????? actions
    :param start_date:
    :param end_date:
    :return: actions: pd.Dataframe
    """
    dump_path = './cache/all_action_%s_%s.pkl' % (start_date, end_date)
    if os.path.exists(dump_path):
        actions = pickle.load(open(dump_path))
    else:
        action_1 = get_actions_1()
        action_2 = get_actions_2()
        action_3 = get_actions_3()
        actions = pd.concat([action_1, action_2, action_3])
        actions = actions[(actions.time >= start_date) & (actions.time < end_date)]
        pickle.dump(actions, open(dump_path, 'w'))
    return actions
项目:WNTR    作者:USEPA    | 项目源码 | 文件源码
def sample_damage_state(self, Pr):
        """
        Sample the damage state using a uniform random variable

         Parameters
        -----------
        Pr : pd.Dataframe
            Probability of exceeding a damage state

        Returns
        -------
        damage_state : pd.Series
            The damage state of each element
        """
        p = pd.Series(data = np.random.uniform(size=Pr.shape[0]), index=Pr.index)

        damage_state = pd.Series(data=[None]* Pr.shape[0], index=Pr.index)

        for DS_names in Pr.columns:
            damage_state[p < Pr[DS_names]] = DS_names

        return damage_state
项目:pastas    作者:pastas    | 项目源码 | 文件源码
def get_stress(self, p=None, tindex=None):
        """Returns the stress or stresses of the time series object as a pandas
        DataFrame.

        If the time series object has multiple stresses each column
        represents a stress.

        Returns
        -------
        stress: pd.Dataframe
            Pandas dataframe of the stress(es)

        """
        if tindex is not None:
            return self.stress[tindex]
        else:
            return self.stress
项目:prophet    作者:facebook    | 项目源码 | 文件源码
def predict(self, df=None):
        """Predict using the prophet model.

        Parameters
        ----------
        df: pd.DataFrame with dates for predictions (column ds), and capacity
            (column cap) if logistic growth. If not provided, predictions are
            made on the history.

        Returns
        -------
        A pd.DataFrame with the forecast components.
        """
        if df is None:
            df = self.history.copy()
        else:
            if df.shape[0] == 0:
                raise ValueError('Dataframe has no rows.')
            df = self.setup_dataframe(df.copy())

        df['trend'] = self.predict_trend(df)
        seasonal_components = self.predict_seasonal_components(df)
        intervals = self.predict_uncertainty(df)

        # Drop columns except ds, cap, floor, and trend
        cols = ['ds', 'trend']
        if 'cap' in df:
            cols.append('cap')
        if self.logistic_floor:
            cols.append('floor')
        # Add in forecast components
        df2 = pd.concat((df[cols], intervals, seasonal_components), axis=1)
        df2['yhat'] = df2['trend'] + df2['seasonal']
        return df2
项目:prophet    作者:facebook    | 项目源码 | 文件源码
def sample_model(self, df, seasonal_features, iteration):
        """Simulate observations from the extrapolated generative model.

        Parameters
        ----------
        df: Prediction dataframe.
        seasonal_features: pd.DataFrame of seasonal features.
        iteration: Int sampling iteration to use parameters from.

        Returns
        -------
        Dataframe with trend, seasonality, and yhat, each like df['t'].
        """
        trend = self.sample_predictive_trend(df, iteration)

        beta = self.params['beta'][iteration]
        seasonal = np.matmul(seasonal_features.as_matrix(), beta) * self.y_scale

        sigma = self.params['sigma_obs'][iteration]
        noise = np.random.normal(0, sigma, df.shape[0]) * self.y_scale

        return pd.DataFrame({
            'yhat': trend + seasonal + noise,
            'trend': trend,
            'seasonal': seasonal,
        })
项目:prophet    作者:facebook    | 项目源码 | 文件源码
def make_future_dataframe(self, periods, freq='D', include_history=True):
        """Simulate the trend using the extrapolated generative model.

        Parameters
        ----------
        periods: Int number of periods to forecast forward.
        freq: Any valid frequency for pd.date_range, such as 'D' or 'M'.
        include_history: Boolean to include the historical dates in the data
            frame for predictions.

        Returns
        -------
        pd.Dataframe that extends forward from the end of self.history for the
        requested number of periods.
        """
        last_date = self.history_dates.max()
        dates = pd.date_range(
            start=last_date,
            periods=periods + 1,  # An extra in case we include start
            freq=freq)
        dates = dates[dates > last_date]  # Drop start if equals last_date
        dates = dates[:periods]  # Return correct number of periods

        if include_history:
            dates = np.concatenate((np.array(self.history_dates), dates))

        return pd.DataFrame({'ds': dates})
项目:JDcontest    作者:zsyandjyhouse    | 项目源码 | 文件源码
def get_actions(start_time, end_time):
    """
    :param start_date:
    :param end_date:
    :return: actions: pd.Dataframe
    """
    FilePath = "../JData/"
    ActionAllFile = "JData_Action_All.csv"
    #ActionAllFile = "JData_Action_before_327.csv"
    action_all = pd.read_csv(FilePath + ActionAllFile,nrows=100000)
    action_all.time = pd.to_datetime(action_all['time'],format='%Y-%m-%d %H:%M:%S')
    actions = action_all[(action_all.time >= start_time) & (action_all.time <= end_time)]
    return actions
项目:WNTR    作者:USEPA    | 项目源码 | 文件源码
def cdf_probability(self, x):
        """
        Return the CDF probability for each state, based on the value of x

        Parameters
        -----------
        x : pd.Series
            Control variable for each element

        Returns
        --------
        Pr : pd.Dataframe
            Probability of exceeding a damage state

        """
        state_names = [name for name, state in self.states()]

        Pr = pd.DataFrame(index = x.index, columns=state_names)

        for element in Pr.index:
            for state_name, state in self.states():
                try:
                    dist=state.distribution[element]
                except:
                    dist=state.distribution['Default']
                Pr.loc[element, state_name] = dist.cdf(x[element])

        return Pr
项目:SimpleSQLite    作者:thombashi    | 项目源码 | 文件源码
def select_as_dataframe(
            self, table_name, column_list=None, where=None, extra=None):
        """
        Get data in the database and return fetched data as a
        :py:class:`pandas.Dataframe` instance.

        :param str table_name: |arg_select_table_name|
        :param list column_list: |arg_select_as_xx_column_list|
        :param str where: |arg_select_where|
        :param str extra: |arg_select_extra|
        :return: Table data as a :py:class:`pandas.Dataframe` instance.
        :rtype: pandas.DataFrame
        :raises simplesqlite.NullDatabaseConnectionError:
            |raises_check_connection|
        :raises simplesqlite.TableNotFoundError:
            |raises_verify_table_existence|
        :raises simplesqlite.OperationalError: |raises_operational_error|

        :Example:
            :ref:`example-select-as-dataframe`

        .. note::
            ``pandas`` package required to execute this method.
        """

        import pandas

        if column_list is None:
            column_list = self.get_attr_name_list(table_name)

        result = self.select(
            select=",".join(SqlQuery.to_attr_str_list(column_list)),
            table_name=table_name, where=where, extra=extra)

        if result is None:
            return pandas.DataFrame()

        return pandas.DataFrame(result.fetchall(), columns=column_list)
项目:Eskapade    作者:KaveIO    | 项目源码 | 文件源码
def construct_empty_hist(self, columns):
        """Create an (empty) histogram of right type

        Create a multi-dim histogram by iterating through the columns in
        reverse order and passing a single-dim hist as input to the next
        column.

        :param list columns: histogram columns
        :returns: created histogram
        :rtype: histogrammar.Count
        """

        hist = hg.Count()

        # create a multi-dim histogram by iterating through the columns in reverse order
        # and passing a single-dim hist as input to the next column
        for col in reversed(columns):
            # histogram type depends on the data type
            dt = np.dtype(self.var_dtype[col])

            # processing function, e.g. only accept boolians during filling
            f = self.quantity[col] if col in self.quantity else hf.QUANTITY[dt.type]
            if len(columns) == 1:
                # df[col] is a pd.series
                quant = lambda x, fnc=f: fnc(x)
            else:
                # df[columns] is a pd.Dataframe
                # fix column to col
                quant = lambda x, fnc=f, clm=col: fnc(x[clm])

            is_number = isinstance(dt.type(), np.number)
            is_timestamp = isinstance(dt.type(), np.datetime64)

            if is_number or is_timestamp:
                # numbers and timestamps are put in a sparse binned histogram
                bs = self.bin_specs.get(col, self._unit_bin_specs if is_number else self._unit_timestamp_specs)
                hist = hg.SparselyBin(binWidth=bs['bin_width'], origin=bs['bin_offset'], quantity=quant, value=hist)
            else:
                # string and boolians are treated as categories
                hist = hg.Categorize(quantity=quant, value=hist)

        # FIXME stick data types and number of dimension to histogram
        dta = [self.var_dtype[col] for col in columns]
        hist.datatype = dta[0] if len(columns) == 1 else dta
        hist.n_dim = len(columns)

        @property
        def n_bins(self):
            if hasattr(self, 'num'):
                return self.num
            elif hasattr(self, 'size'):
                return self.size
            else:
                raise RuntimeError('Cannot retrieve number of bins from hgr hist')
        hist.n_bins = n_bins

        return hist
项目:prophet    作者:facebook    | 项目源码 | 文件源码
def make_all_seasonality_features(self, df):
        """Dataframe with seasonality features.

        Includes seasonality features, holiday features, and added regressors.

        Parameters
        ----------
        df: pd.DataFrame with dates for computing seasonality features and any
            added regressors.

        Returns
        -------
        pd.DataFrame with regression features.
        list of prior scales for each column of the features dataframe.
        """
        seasonal_features = []
        prior_scales = []

        # Seasonality features
        for name, props in self.seasonalities.items():
            features = self.make_seasonality_features(
                df['ds'],
                props['period'],
                props['fourier_order'],
                name,
            )
            seasonal_features.append(features)
            prior_scales.extend(
                [props['prior_scale']] * features.shape[1])

        # Holiday features
        if self.holidays is not None:
            features, holiday_priors = self.make_holiday_features(df['ds'])
            seasonal_features.append(features)
            prior_scales.extend(holiday_priors)

        # Additional regressors
        for name, props in self.extra_regressors.items():
            seasonal_features.append(pd.DataFrame(df[name]))
            prior_scales.append(props['prior_scale'])

        if len(seasonal_features) == 0:
            seasonal_features.append(
                pd.DataFrame({'zeros': np.zeros(df.shape[0])}))
            prior_scales.append(1.)
        return pd.concat(seasonal_features, axis=1), prior_scales
项目:prophet    作者:facebook    | 项目源码 | 文件源码
def predict_seasonal_components(self, df):
        """Predict seasonality components, holidays, and added regressors.

        Parameters
        ----------
        df: Prediction dataframe.

        Returns
        -------
        Dataframe with seasonal components.
        """
        seasonal_features, _ = self.make_all_seasonality_features(df)
        lower_p = 100 * (1.0 - self.interval_width) / 2
        upper_p = 100 * (1.0 + self.interval_width) / 2

        components = pd.DataFrame({
            'col': np.arange(seasonal_features.shape[1]),
            'component': [x.split('_delim_')[0] for x in seasonal_features.columns],
        })
        # Add total for all regression components
        components = components.append(pd.DataFrame({
            'col': np.arange(seasonal_features.shape[1]),
            'component': 'seasonal',
        }))
        # Add totals for seasonality, holiday, and extra regressors
        components = self.add_group_component(
            components, 'seasonalities', self.seasonalities.keys())
        if self.holidays is not None:
            components = self.add_group_component(
                components, 'holidays', self.holidays['holiday'].unique())
        components = self.add_group_component(
            components, 'extra_regressors', self.extra_regressors.keys())
        # Remove the placeholder
        components = components[components['component'] != 'zeros']

        X = seasonal_features.as_matrix()
        data = {}
        for component, features in components.groupby('component'):
            cols = features.col.tolist()
            comp_beta = self.params['beta'][:, cols]
            comp_features = X[:, cols]
            comp = (
                np.matmul(comp_features, comp_beta.transpose())
                * self.y_scale  # noqa W503
            )
            data[component] = np.nanmean(comp, axis=1)
            data[component + '_lower'] = np.nanpercentile(comp, lower_p,
                                                            axis=1)
            data[component + '_upper'] = np.nanpercentile(comp, upper_p,
                                                            axis=1)
        return pd.DataFrame(data)