Python dill 模块,dump() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用dill.dump()

项目:marseille    作者:vene    | 项目源码 | 文件源码
def saga_cv_cache(*args):

    arghash = sha1(repr(args).encode('utf-8')).hexdigest()
    fn = "res/baseline_linear_{}.dill".format(arghash)

    try:
        with open(fn, 'rb') as f:
            out = dill.load(f)
        logging.info("Loaded cached version.")
    except FileNotFoundError:
        logging.info("Computing...")
        out = saga_cv(*args)
        with open(fn, 'wb') as f:
            dill.dump(out, f)

    return out
项目:pysc2-examples    作者:chris-chris    | 项目源码 | 文件源码
def save(self, path):
    """Save model to a pickle located at `path`"""
    with tempfile.TemporaryDirectory() as td:
      U.save_state(os.path.join(td, "model"))
      arc_name = os.path.join(td, "packed.zip")
      with zipfile.ZipFile(arc_name, 'w') as zipf:
        for root, dirs, files in os.walk(td):
          for fname in files:
            file_path = os.path.join(root, fname)
            if file_path != arc_name:
              zipf.write(file_path,
                         os.path.relpath(file_path, td))
      with open(arc_name, "rb") as f:
        model_data = f.read()
    with open(path, "wb") as f:
      dill.dump((model_data), f)
项目:pysc2-examples    作者:chris-chris    | 项目源码 | 文件源码
def save(self, path):
    """Save model to a pickle located at `path`"""
    with tempfile.TemporaryDirectory() as td:
      U.save_state(os.path.join(td, "model"))
      arc_name = os.path.join(td, "packed.zip")
      with zipfile.ZipFile(arc_name, 'w') as zipf:
        for root, dirs, files in os.walk(td):
          for fname in files:
            file_path = os.path.join(root, fname)
            if file_path != arc_name:
              zipf.write(file_path,
                         os.path.relpath(file_path, td))
      with open(arc_name, "rb") as f:
        model_data = f.read()
    with open(path, "wb") as f:
      dill.dump((model_data), f)
项目:RD-MCL    作者:biologyguy    | 项目源码 | 文件源码
def test_chain_dump_obj():
    walker1 = SimpleNamespace(_dump_obj=lambda *_: "walker1")
    walker2 = SimpleNamespace(_dump_obj=lambda *_: "walker2")
    tmp_file = br.TempFile()
    tmp_file.write("outfile results")

    chain = SimpleNamespace(walkers=[walker1, walker2], outfile=tmp_file.path, cold_heat=0.1, hot_heat=0.2,
                            step_counter=20, best_score_ever_seen=100, _dump_obj=mcmcmc._Chain._dump_obj)

    dump = chain._dump_obj(chain)
    assert dump["walkers"] == ["walker1", "walker2"]
    assert dump["cold_heat"] == 0.1
    assert dump["hot_heat"] == 0.2
    assert dump["step_count"] == 20
    assert dump["best_score"] == 100
    assert dump["results"] == "outfile results"
项目:RD-MCL    作者:biologyguy    | 项目源码 | 文件源码
def test_chain_apply_dump(capsys):
    walker1 = SimpleNamespace(_apply_dump=lambda *_: print("Applying dump to walker1"))
    walker2 = SimpleNamespace(_apply_dump=lambda *_: print("Applying dump to walker2"))

    tmp_file = br.TempFile()
    chain = SimpleNamespace(walkers=[walker1, walker2], outfile=tmp_file.path, cold_heat=None, hot_heat=None,
                            step_counter=None, best_score_ever_seen=None, _apply_dump=mcmcmc._Chain._apply_dump)

    var_dict = {"walkers": [None, None], "cold_heat": 0.1, "hot_heat": 0.2,
                "step_count": 20, "best_score": 100, "results": "Some results"}
    chain._apply_dump(chain, var_dict)
    assert chain.walkers == [walker1, walker2]
    out, err = capsys.readouterr()
    assert out == "Applying dump to walker1\nApplying dump to walker2\n"
    assert chain.cold_heat == 0.1
    assert chain.hot_heat == 0.2
    assert chain.step_counter == 20
    assert chain.best_score_ever_seen == 100
    assert tmp_file.read() == "Some results"
项目:RD-MCL    作者:biologyguy    | 项目源码 | 文件源码
def test_mcmcmc_resume(capsys):
    mc_obj = SimpleNamespace(dumpfile="does_not_exist", resume=mcmcmc.MCMCMC.resume)
    assert mc_obj.resume(mc_obj) is False

    tmp_file = br.TempFile(byte_mode=True)
    dill.dump(["a", "b", "c"], tmp_file)

    mc_obj.dumpfile = tmp_file.path
    chain1 = SimpleNamespace(_apply_dump=lambda *_: print("applying chain1"))
    chain2 = SimpleNamespace(_apply_dump=lambda *_: print("applying chain2"))
    chain3 = SimpleNamespace(_apply_dump=lambda *_: print("applying chain3"))
    mc_obj.chains = [chain1, chain2, chain3]
    mc_obj.run = lambda *_: print("Running")

    assert mc_obj.resume(mc_obj) is True
    out, err = capsys.readouterr()
    assert out == "applying chain1\napplying chain2\napplying chain3\nRunning\n", print(out)
项目:chemworkflows    作者:avirshup    | 项目源码 | 文件源码
def run_preprocessing(runner, outdir):
    t = runner.preprocess()

    print '\nFINISHED preprocessing. Output directory:'
    print "    ", os.path.abspath(outdir)

    resultjson = {}
    for field in t.outputfields:
        if field == 'pdbstring':
            with open(os.path.join(outdir, 'prep.pdb'), 'w') as outfile:
                print >> outfile, t.getoutput('pdbstring')
        else:
            resultjson[field] = t.getoutput(field)

    with open(os.path.join(outdir, 'prep.json'), 'w') as outfile:
        json.dump(resultjson, outfile)
    with open(os.path.join(outdir, 'workflow_state.dill'), 'w') as outfile:
        dill.dump(runner, outfile)
项目:ip6words    作者:lstn    | 项目源码 | 文件源码
def dill_words(num_words, fname="words.dill"):
    fname = os.path.join(os.path.dirname(os.path.realpath(__file__)), fname)
    try:
        if os.path.isfile(fname):
            words = dill.load(open(fname, "rb"))
            if(len(words) < ip_handling.iutils.get_ipv6_word_possibilities()):
                os.remove(fname)
                raise Exception # go into except block to reload words
            return words
        else:
            words = load_words(num_words)
            if(len(words) < ip_handling.iutils.get_ipv6_word_possibilities()):
                raise Exception # go into except block to reload words
            dill.dump(words, open(fname, "wb"))
            return words
    except:
        try:
            words = load_words(num_words)
            if(len(words) < ip_handling.iutils.get_ipv6_word_possibilities()):
                raise Exception # go into except block to reload words
            dill.dump(words, open(fname, "wb"))
            return words
        except:
            return load_words(num_words)
项目:learning-to-learn    作者:deepmind    | 项目源码 | 文件源码
def save(network, sess, filename=None):
  """Save the variables contained by a network to disk."""
  to_save = collections.defaultdict(dict)
  variables = snt.get_variables_in_module(network)

  for v in variables:
    split = v.name.split(":")[0].split("/")
    module_name = split[-2]
    variable_name = split[-1]
    to_save[module_name][variable_name] = v.eval(sess)

  if filename:
    with open(filename, "wb") as f:
      pickle.dump(to_save, f)

  return to_save
项目:easytrader    作者:yuzhucu    | 项目源码 | 文件源码
def main(prepare, use, do, get, params, debug):
    if get is not None:
        do = get
    if prepare is not None and use in ['ht', 'yjb', 'yh', 'gf', 'xq']:
        user = easytrader.use(use, debug)
        user.prepare(prepare)
        with open(ACCOUNT_OBJECT_FILE, 'wb') as f:
            dill.dump(user, f)
    if do is not None:
        with open(ACCOUNT_OBJECT_FILE, 'rb') as f:
            user = dill.load(f)

        if len(params) > 0:
            result = getattr(user, do)(*params)
        else:
            result = getattr(user, do)

        json_result = json.dumps(result, indent=4, ensure_ascii=False, sort_keys=True)
        click.echo(json_result)
项目:Quadflor    作者:quadflor    | 项目源码 | 文件源码
def persist(self, X, y, thesaurus):
        """
        Save the data and the processed thesaurus.

        Parameters
        ----------
        X: sparse matrix
            The train data: Will be compressed.
        y: sparse matrix
            The label data: Will be compressed.
        thesaurus: ThesaurusReader
            ThesaurusReader object: Will be pickled.
        """
        print('Persisting features to disk')
        self._delete_old_files()
        self._save(self._persist_name('X'), X)
        self._save(self._persist_name('y'), y)
        with open(self._persist_name('TR'), mode='wb') as f:
            pickle.dump(thesaurus, f)
项目:tf-tutorial    作者:zchen0211    | 项目源码 | 文件源码
def save(network, sess, filename=None):
  """Save the variables contained by a network to disk."""
  to_save = collections.defaultdict(dict)
  variables = nn.get_variables_in_module(network)

  for v in variables:
    split = v.name.split(":")[0].split("/")
    module_name = split[-2]
    variable_name = split[-1]
    to_save[module_name][variable_name] = v.eval(sess)

  if filename:
    with open(filename, "wb") as f:
      pickle.dump(to_save, f)

  return to_save
项目:shapenet    作者:pannous    | 项目源码 | 文件源码
def restore(file_name="dump.bin"):
    return pickle.load(open(file_name, 'rb'))


# class Encoding:
#     pass


# @extension
# class Math:
# WOOOT? just
# import math as Math
# def __getattr__(self, attr):
#     import sys
#     import math
#  # ruby method_missing !!!
#     import inspect
#     for name, obj in inspect.getmembers(sys.modules['math']):
#         if name==attr: return obj
#     return False
项目:acl2017    作者:tttthomasssss    | 项目源码 | 文件源码
def save_vector_cache(vectors, vector_out_file, filetype='', **kwargs):
    logging.info("Saving {} vectors to cache {}".format(len(vectors),vector_out_file))
    if (vector_out_file.endswith('.dill') or filetype == 'dill'):
        with open(vector_out_file, 'wb') as data_file:
            dill.dump(vectors, data_file, protocol=kwargs.get('dill_protocol', 3))
    elif (vector_out_file.endswith('.joblib') or filetype == 'joblib'):
        joblib.dump(vectors, vector_out_file, compress=kwargs.get('joblib_compression', 3),
                    protocol=kwargs.get('joblib_protocol', 3))
    elif (vector_out_file.endswith('.sqlite') or filetype == 'sqlite'):
        autocommit = kwargs.pop('autocommit', True)
        if (isinstance(vectors, SqliteDict)):
            vectors.commit()
        else:
            with SqliteDict(vector_out_file, autocommit=autocommit) as data_file:
                for key, value in vectors.items():
                    data_file[key] = value

                if (not autocommit):
                    data_file.commit()
    else:
        raise NotImplementedError
项目:distributional_perspective_on_RL    作者:Kiwoo    | 项目源码 | 文件源码
def save(self, path):
        """Save model to a pickle located at `path`"""
        with tempfile.TemporaryDirectory() as td:
            U.save_state(os.path.join(td, "model"))
            arc_name = os.path.join(td, "packed.zip")
            with zipfile.ZipFile(arc_name, 'w') as zipf:
                for root, dirs, files in os.walk(td):
                    for fname in files:
                        file_path = os.path.join(root, fname)
                        if file_path != arc_name:
                            zipf.write(file_path, os.path.relpath(file_path, td))
            with open(arc_name, "rb") as f:
                model_data = f.read()
        with open(path, "wb") as f:
            dill.dump((model_data, self._act_params), f)
项目:kaggle    作者:RankingAI    | 项目源码 | 文件源码
def __MergeData(cls, InputDir, OutputDir, mode):
        """"""
        if(mode == 'train'):
            ActionDataFile = '%s/train_2016_v2.csv' % InputDir
            OutputFile = '%s/train.pkl' % OutputDir
        else:
            ActionDataFile = '%s/sample_submission.csv' % InputDir
            OutputFile = '%s/test.pkl' % OutputDir

        print(OutputFile)

        PropertyDataFile = '%s/properties_2016.csv' % InputDir

        ## load
        ActionData = pd.read_csv(ActionDataFile, parse_dates=['transactiondate'])
        PropertyData = pd.read_csv(PropertyDataFile)

        ## left join
        MergedData = ActionData.merge(PropertyData, how='left', on='parcelid')

        ## output into pkl file
        if (os.path.exists(OutputDir) == False):
            os.makedirs(OutputDir)
        with open(OutputFile, 'wb') as o_file:
            pickle.dump(MergedData, o_file, -1)
        o_file.close()

        return

    ## split rawcensustractandblock into census, tract and block
项目:kaggle    作者:RankingAI    | 项目源码 | 文件源码
def __ParseCTB(cls, InputDir, OutputDir, mode):
        """"""
        if(mode == 'train'):
            InputFile = '%s/train.pkl' % InputDir
            OutputFile = '%s/train.pkl' % OutputDir
        else:
            InputFile = '%s/test.pkl' % InputDir
            OutputFile = '%s/test.pkl' % OutputDir

        ## load
        with open(InputFile, 'rb') as i_file:
            df_data = pickle.load(i_file)
        i_file.close()

        ## extract census, tract and block identifies
        df_data['rawcensustractandblock'] = (df_data['rawcensustractandblock'] * 1000).astype(np.float64).astype(np.int64)
        df_data['fipsid'] = ((df_data['rawcensustractandblock'] / 10000000).astype(np.int64)).astype(str)
        df_data['tractandblock'] = df_data['rawcensustractandblock'] % 10000000
        df_data['tractid'] = ((df_data['tractandblock'] / 10).astype(np.int64)).astype(str)
        df_data['blockid'] = ((df_data['tractandblock'] % 10).astype(np.int64)).astype(str)
        df_data.drop(['fips', 'rawcensustractandblock', 'tractandblock'], axis = 1, inplace = True)

        ## output into pkl file
        if (os.path.exists(OutputDir) == False):
            os.makedirs(OutputDir)
        with open(OutputFile, 'wb') as o_file:
            pickle.dump(df_data, o_file, -1)
        o_file.close()

        return
项目:kaggle    作者:RankingAI    | 项目源码 | 文件源码
def __SplitData(cls, InputDir, OutputDir, mode):
        """"""
        if(mode == 'train'):
            InputFileData = '%s/train.pkl' % InputDir
        else:
            InputFileData = '%s/test.pkl' % InputDir

        InputFileFeatMap = '%s/featmap.pkl' % InputDir

        ## load
        with open(InputFileData, 'rb') as i_file:
            df_data = pickle.load(i_file)
        i_file.close()
        with open(InputFileFeatMap, 'rb') as i_file:
            d_feat = pickle.load(i_file)
        i_file.close()

        if (os.path.exists(OutputDir) == False):
            os.makedirs(OutputDir)
        with open('%s/featmap.pkl' % OutputDir, 'wb') as o_file:
            pickle.dump(d_feat, o_file, -1)
        o_file.close()

        ## output into individual pkl files
        for i in range(12):
            month = i + 1
            df_MonthData = df_data[(df_data['transactiondate'].dt.month == month)]
            with open('%s/%s.pkl'% (OutputDir, month), 'wb') as o_file:
                pickle.dump(df_MonthData, o_file, -1)
            o_file.close()

        return

    ## launch single task
项目:kaggle    作者:RankingAI    | 项目源码 | 文件源码
def run(self, tasks, MonthsOfTest):
        """"""
        print('\nLoad data ...')
        start = time.time()
        ## load train
        with open('%s/1.pkl' % self._InputDir, 'rb') as i_file:
            self.TrainData = pickle.load(i_file)
        i_file.close()
        for i in range(2,MonthsOfTest[0]):
            with open('%s/%s.pkl' % (self._InputDir, i), 'rb') as i_file:
                df_tmp = pickle.load(i_file)
                self.TrainData = pd.concat([self.TrainData, df_tmp], ignore_index = True)
            i_file.close()
        ## load test
        with open('%s/%s.pkl' % (self._InputDir, MonthsOfTest[0]), 'rb') as i_file:
            self.TestData = pickle.load(i_file)
        i_file.close()
        for i in MonthsOfTest[1: ]:
            with open('%s/%s.pkl' % (self._InputDir, i), 'rb') as i_file:
                df_tmp = pickle.load(i_file)
                self.TestData = pd.concat([self.TestData, df_tmp], ignore_index = True)
            i_file.close()
        end = time.time()
        print('Load data done, time consumed %ds ...' % (end - start))

        ## tasks for l2 test
        print('\nLaunch task ...')
        start = time.time()
        for task in tasks:
            self.__LaunchTask(task, MonthsOfTest)
        end = time.time()
        if (os.path.exists(self._OutputDir) == False):
            os.makedirs(self._OutputDir)
        with open('%s/train.pkl' % self._OutputDir, 'wb') as o_file:
            pickle.dump(self.TrainData, o_file, -1)
        o_file.close()
        with open('%s/test.pkl' % self._OutputDir, 'wb') as o_file:
            pickle.dump(self.TestData, o_file, -1)
        o_file.close()
        print('All tasks done, time consumed %ds ...' % (end - start))
项目:kaggle    作者:RankingAI    | 项目源码 | 文件源码
def train(self):
        """"""
        print('size before truncated outliers is %d ' % len(self.TrainData))
        self.TrainData = self.TrainData[(self.TrainData['logerror'] > self._low) & (self.TrainData['logerror'] < self._up)]
        print('size after truncated outliers is %d ' % len(self.TrainData))

        X = self.TrainData.drop(self._l_drop_cols, axis=1)
        Y = self.TrainData['logerror']
        self._l_train_columns = X.columns
        FeatCols = list(self._l_train_columns)

        etr = ExtraTreesRegressor(
            n_estimators= self._iter,
            criterion= 'mse',
            max_features= int(math.sqrt(len(FeatCols))),
            max_depth = self._depth,
            n_jobs= 2,
            random_state= 2017,
            verbose= True
        )

        self._model = etr.fit(X, Y)

        ## evaluate on valid data
        self._f_eval_train_model = '{0}/{1}_{2}.pkl'.format(self.OutputDir, self.__class__.__name__,
                                                            datetime.now().strftime('%Y%m%d-%H:%M:%S'))
        with open(self._f_eval_train_model, 'wb') as o_file:
            pickle.dump(self._model, o_file, -1)
        o_file.close()

        self.TrainData = pd.concat([self.TrainData, self.ValidData[self.TrainData.columns]],
                                   ignore_index=True)  ## ignore_index will reset the index or index will be overlaped

        return
项目:kaggle    作者:RankingAI    | 项目源码 | 文件源码
def train(self):
        """"""
        start = time.time()

        print('size before truncated outliers is %d ' % len(self.TrainData))
        TrainData = self.TrainData[(self.TrainData['logerror'] > self._low) & (self.TrainData['logerror'] < self._up)]
        print('size after truncated outliers is %d ' % len(self.TrainData))

        TrainData['longitude'] -= -118600000
        TrainData['latitude'] -= 34220000
        #extra_tr = pd.read_hdf(path_or_buf='%s/p21/eval_train.hdf' % self.InputDir, key='train')
        #self.TrainData = pd.concat([self.TrainData, extra_tr.drop('parcelid', axis= 1)], axis = 1)

        X = self.TrainData.drop(self._l_drop_cols, axis=1)
        Y = self.TrainData['logerror']
        self._l_train_columns = X.columns
        X = X.values.astype(np.float32, copy=False)

        lr = LassoLars(alpha= self._lr_alpha, max_iter= self._lr_iter, verbose= True)
        self._model = lr.fit(X, Y)
        end = time.time()

        print('Training iterates %d, time consumed %d ' % (self._model.n_iter_, (end - start)))

        self._f_eval_train_model = '{0}/{1}_{2}.pkl'.format(self.OutputDir, self.__class__.__name__,
                                                            datetime.now().strftime('%Y%m%d-%H:%M:%S'))
        #with open(self._f_eval_train_model, 'wb') as o_file:
        #    pickle.dump(self._model, o_file, -1)
        #o_file.close()

        #self.TrainData = pd.concat([self.TrainData, self.ValidData[self.TrainData.columns]],
        #                           ignore_index=True)  ## ignore_index will reset the index or index will be overlaped

        return
项目:kaggle    作者:RankingAI    | 项目源码 | 文件源码
def train(self):
        """"""
        start = time.time()

        extra_tr = pd.read_hdf(path_or_buf='%s/p21/eval_train.hdf' % self.InputDir, key='train')

        print('size before truncated outliers is %d ' % len(self.TrainData))
        self.TrainData = self.TrainData[(self.TrainData['logerror'] > self._low) & (self.TrainData['logerror'] < self._up)]
        #self.TrainData = self.TrainData.join(extra_tr, on='parcelid', how= 'left')
        self.TrainData = pd.concat([self.TrainData, extra_tr.drop('parcelid', axis= 1)], axis = 1)
        print('size after truncated outliers is %d ' % len(self.TrainData))

        X = self.TrainData.drop(self._l_drop_cols, axis=1)
        Y = self.TrainData['logerror']
        self._l_train_columns = X.columns
        X = X.values.astype(np.float32, copy=False)

        lr = Lasso(alpha= self._lr_alpha, max_iter= self._lr_iter, tol= 1e-4, random_state= 2017, selection= self._lr_sel)
        self._model = lr.fit(X, Y)
        end = time.time()

        print('Training iterates %d, time consumed %d ' % (self._model.n_iter_, (end - start)))

        self._f_eval_train_model = '{0}/{1}_{2}.pkl'.format(self.OutputDir, self.__class__.__name__,
                                                            datetime.now().strftime('%Y%m%d-%H:%M:%S'))
        with open(self._f_eval_train_model, 'wb') as o_file:
            pickle.dump(self._model, o_file, -1)
        o_file.close()

        #self.TrainData = pd.concat([self.TrainData, self.ValidData[self.TrainData.columns]],
        #                           ignore_index=True)  ## ignore_index will reset the index or index will be overlaped

        return
项目:kaggle    作者:RankingAI    | 项目源码 | 文件源码
def train(self):
        """"""
        start = time.time()

        print('size before truncated outliers is %d ' % len(self.TrainData))
        TrainData = self.TrainData[(self.TrainData['logerror'] > self._low) & (self.TrainData['logerror'] < self._up)]
        print('size after truncated outliers is %d ' % len(TrainData))

        X = TrainData.drop(self._l_drop_cols, axis=1)
        Y = TrainData['logerror']
        self._l_train_columns = X.columns
        X = X.values.astype(np.float32, copy=False)

        rr = Ridge(alpha= self._alpha,
                   max_iter = self._iter,
                   solver= 'svd')

        self._model = rr.fit(X, Y)
        end = time.time()

        print('time consumed %d ' % ((end - start)))

        self._f_eval_train_model = '{0}/{1}_{2}.pkl'.format(self.OutputDir, self.__class__.__name__,
                                                            datetime.now().strftime('%Y%m%d-%H:%M:%S'))
        with open(self._f_eval_train_model, 'wb') as o_file:
            pickle.dump(self._model, o_file, -1)
        o_file.close()

        self.TrainData = pd.concat([self.TrainData, self.ValidData[self.TrainData.columns]],
                                   ignore_index=True)  ## ignore_index will reset the index or index will be overlaped

        return
项目:kaggle    作者:RankingAI    | 项目源码 | 文件源码
def train(self):
        """"""
        print('size before truncated outliers is %d ' % len(self.TrainData))
        #TrainData = self.TrainData[(self.TrainData['logerror'] > -0.4) & (self.TrainData['logerror'] < 0.418)]
        TrainData = self.TrainData
        print('size after truncated outliers is %d ' % len(TrainData))
        print('train data size %d' % len(TrainData))

        #self.__ExtraEncode()

        X = TrainData.drop(self._l_drop_cols, axis=1)
        Y = TrainData['logerror']
        l_train_columns = X.columns

        cols = []
        for col in l_train_columns:
            for cc in self._l_cate_cols:
                if (col.startswith('%s_' % cc)):
                    cols.append(col)
                    break

        tmp_cols = set(cols)
        if(len(tmp_cols) != len(cols)):
            print('!!!! cols duplicated .')

        self._l_train_columns = list(tmp_cols)

        X = scipy.sparse.csr_matrix(X[self._l_train_columns])
        self._model = als.FMRegression(n_iter= self._iter, init_stdev=0.1, rank= self._rank, l2_reg_w= self._reg_w, l2_reg_V= self._reg_v)
        self._model.fit(X, Y)

        print('training done.')

        self._f_eval_train_model = '{0}/{1}_{2}.pkl'.format(self.OutputDir, self.__class__.__name__,datetime.now().strftime('%Y%m%d-%H:%M:%S'))
        with open(self._f_eval_train_model,'wb') as o_file:
            pickle.dump(self._model,o_file,-1)
        o_file.close()

        self.TrainData = pd.concat([self.TrainData,self.ValidData[self.TrainData.columns]],ignore_index= True) ## ignore_index will reset the index or index will be overlaped

        return
项目:catalearn    作者:Catalearn    | 项目源码 | 文件源码
def wrap(self, func):

        def wrapped_func(*args):

            gpu_hash, gpu_ip, ws_port = self.connector.contact_server()

            if (gpu_hash is None or gpu_ip is None or ws_port is None):
                return

            source = get_source(func)
            params = get_local_vars(source, 4)

            uploads = {}
            uploads['function'] = func
            uploads['variables'] = args
            uploads['env'] = params

            with open('uploads.pkl', 'wb') as file:
                dill.dump(uploads, file)

            self.connector.upload_params_decorator(gpu_ip, gpu_hash)
            outUrl = self.connector.stream_output(gpu_ip, gpu_hash, ws_port)

            if outUrl is None:
                color_print('computation failed')
                return 

            result = self.connector.get_return_object(outUrl)
            return result

        return wrapped_func
项目:catalearn    作者:Catalearn    | 项目源码 | 文件源码
def run_in_cloud(cell, connector, namespace):

    local_vars = get_local_vars(cell, namespace)
    imports, unused_vars = find_required_imports(cell, local_vars)

    for var in unused_vars:
        del local_vars[var]

    uploads = {}
    uploads['cell'] = cell
    uploads['env'] = local_vars
    uploads['imports'] = imports

    with open('uploads.pkl', 'wb') as file:
        dill.dump(uploads, file)

    server_info = connector.contact_server()
    if (server_info is None):
        return 

    gpu_hash, gpu_ip, ws_port = server_info

    connector.upload_params_magic(gpu_ip, gpu_hash)
    outUrl = connector.stream_output(gpu_ip, gpu_hash, ws_port)

    if outUrl is None:
        return 

    result = connector.get_return_object(outUrl)
    return result
项目:combine-DT-with-NN-in-RL    作者:Burning-Bear    | 项目源码 | 文件源码
def save(self, path):
        """Save model to a pickle located at `path`"""
        with tempfile.TemporaryDirectory() as td:
            U.save_state(os.path.join(td, "model"))
            arc_name = os.path.join(td, "packed.zip")
            with zipfile.ZipFile(arc_name, 'w') as zipf:
                for root, dirs, files in os.walk(td):
                    for fname in files:
                        file_path = os.path.join(root, fname)
                        if file_path != arc_name:
                            zipf.write(file_path, os.path.relpath(file_path, td))
            with open(arc_name, "rb") as f:
                model_data = f.read()
        with open(path, "wb") as f:
            dill.dump((model_data, self._act_params), f)
项目:combine-DT-with-NN-in-RL    作者:Burning-Bear    | 项目源码 | 文件源码
def save(self, path):
        """Save model to a pickle located at `path`"""
        with tempfile.TemporaryDirectory() as td:
            U.save_state(os.path.join(td, "model"))
            arc_name = os.path.join(td, "packed.zip")
            with zipfile.ZipFile(arc_name, 'w') as zipf:
                for root, dirs, files in os.walk(td):
                    for fname in files:
                        file_path = os.path.join(root, fname)
                        if file_path != arc_name:
                            zipf.write(file_path, os.path.relpath(file_path, td))
            with open(arc_name, "rb") as f:
                model_data = f.read()
        with open(path, "wb") as f:
            dill.dump((model_data, self._act_params), f)
项目:rl-attack-detection    作者:yenchenlin    | 项目源码 | 文件源码
def save(self, path):
        """Save model to a pickle located at `path`"""
        with tempfile.TemporaryDirectory() as td:
            U.save_state(os.path.join(td, "model"))
            arc_name = os.path.join(td, "packed.zip")
            with zipfile.ZipFile(arc_name, 'w') as zipf:
                for root, dirs, files in os.walk(td):
                    for fname in files:
                        file_path = os.path.join(root, fname)
                        if file_path != arc_name:
                            zipf.write(file_path, os.path.relpath(file_path, td))
            with open(arc_name, "rb") as f:
                model_data = f.read()
        with open(path, "wb") as f:
            dill.dump((model_data, self._act_params), f)
项目:BMSpy    作者:romeopatrick11    | 项目源码 | 文件源码
def Save(self,name_file):
        """
            name_file: name of the file without extension.
            The extension .bms is added by function
        """
        with open(name_file+'.bms','wb') as file:
            model=dill.dump(self,file)
项目:hakkuframework    作者:4shadoww    | 项目源码 | 文件源码
def save_session(fname=None, session=None, pickleProto=4):
    import dill as pickle

    if fname is None:
        fname = conf.session
        if not fname:
            conf.session = fname = utils.get_temp_file(keep=True)
            log_interactive.info("Use [%s] as session file" % fname)
    if session is None:
        session = builtins.__dict__["scapy_session"]

    to_be_saved = session.copy()

    for k in list(to_be_saved.keys()):
        if k in ["__builtins__", "In", "Out", "conf"] or k.startswith("_") or \
                (hasattr(to_be_saved[k], "__module__") and str(to_be_saved[k].__module__).startswith('IPython')):
            del(to_be_saved[k])
            continue
        if type(to_be_saved[k]) in [type, types.ModuleType, types.MethodType]:
             log_interactive.info("[%s] (%s) can't be saved." % (k, type(to_be_saved[k])))
             del(to_be_saved[k])

    try:
        os.rename(fname, fname+".bak")
    except OSError:
        pass
    f=gzip.open(fname,"wb")
    for i in to_be_saved.keys():
        #d = {i: to_be_saved[i]}
        #pickle.dump(d, f, pickleProto)
        pickle.dump(to_be_saved, f, pickleProto)
    f.close()
项目:hakkuframework    作者:4shadoww    | 项目源码 | 文件源码
def save_object(fname, obj):
    import dill as pickle
    pickle.dump(obj,gzip.open(fname,"wb"))
项目:marseille    作者:vene    | 项目源码 | 文件源码
def saga_score_struct_cache(*args):

    arghash = sha1(repr(("score_struct",) + args).encode('utf-8')).hexdigest()
    fn = "res/baseline_linear_{}.dill".format(arghash)

    try:
        with open(fn, 'rb') as f:
            out = dill.load(f)
        logging.info("Loaded cached version.")
    except FileNotFoundError:
        logging.info("Computing...")
        out = saga_score_struct(*args)
        with open(fn, 'wb') as f:
            dill.dump(out, f)

    return out
项目:marseille    作者:vene    | 项目源码 | 文件源码
def saga_decision_function(dataset, k, link_alpha, prop_alpha, l1_ratio):

    fn = cache_fname("linear_val_df", (dataset, k, link_alpha, prop_alpha,
                                       l1_ratio))

    if os.path.exists(fn):
        logging.info("Loading {}".format(fn))
        with open(fn, "rb") as f:
            return dill.load(f)

    ds = 'erule' if dataset == 'cdcp' else 'ukp-essays'  # sorry
    path = os.path.join("data", "process", ds, "folds", "{}", "{}")

    # sorry again: get val docs
    n_folds = 5 if dataset == 'ukp' else 3
    load, ids = get_dataset_loader(dataset, "train")
    for k_, (_, val) in enumerate(KFold(n_folds).split(ids)):
        if k_ == k:
            break
    val_docs = list(load(ids[val]))

    X_tr_link, y_tr_link = load_csr(path.format(k, 'train.npz'),
                                    return_y=True)
    X_te_link, y_te_link = load_csr(path.format(k, 'val.npz'),
                                    return_y=True)

    X_tr_prop, y_tr_prop = load_csr(path.format(k, 'prop-train.npz'),
                                    return_y=True)
    X_te_prop, y_te_prop = load_csr(path.format(k, 'prop-val.npz'),
                                    return_y=True)

    baseline = BaselineStruct(link_alpha, prop_alpha, l1_ratio)
    baseline.fit(X_tr_link, y_tr_link, X_tr_prop, y_tr_prop)

    Y_marg = baseline.decision_function(X_te_link, X_te_prop, val_docs)

    with open(fn, "wb") as f:
        logging.info("Saving {}".format(fn))
        dill.dump((Y_marg, baseline), f)

    return Y_marg, baseline
项目:marseille    作者:vene    | 项目源码 | 文件源码
def linear_cv_score(dataset, alpha, l1_ratio, constraints):

    fn = cache_fname("linear_cv_score", (dataset, alpha, l1_ratio,
                                         constraints))
    if os.path.exists(fn):
        logging.info("Loading {}".format(fn))
        with open(fn, "rb") as f:
            return dill.load(f)

    load, ids = get_dataset_loader(dataset, split="train")
    n_folds = 5 if dataset == 'ukp' else 3

    scores = []
    for k, (tr, val) in enumerate(KFold(n_folds).split(ids)):
        Y_marg, bl = saga_decision_function(dataset, k, alpha, alpha, l1_ratio)

        val_docs = list(load(ids[val]))
        Y_true = [doc.label for doc in val_docs]
        Y_pred = bl.fast_decode(Y_marg, val_docs, constraints)

        scores.append(bl._score(Y_true, Y_pred))

    with open(fn, "wb") as f:
        logging.info("Saving {}".format(fn))
        dill.dump(scores, f)
    return scores
项目:loman    作者:janusassetallocation    | 项目源码 | 文件源码
def write_dill(self, file_):
        """
        Serialize a computation to a file or file-like object

        :param file_: If string, writes to a file
        :type file_: File-like object, or string
        """
        node_serialize = nx.get_node_attributes(self.dag, _AN_TAG)
        if all(serialize for name, serialize in six.iteritems(node_serialize)):
            obj = self
        else:
            obj = self.copy()
            for name, tags in six.iteritems(node_serialize):
                if _T_SERIALIZE not in tags:
                    obj._set_uninitialized(name)

        if isinstance(file_, six.string_types):
            with open(file_, 'wb') as f:
                dill.dump(obj, f)
        else:
            dill.dump(obj, file_)
项目:all2vec    作者:iheartradio    | 项目源码 | 文件源码
def save(self, folder):
        """Save object and return corresponding files."""
        if not os.path.exists(folder):
            os.makedirs(folder)
        files = []
        # annoy objects can't be pickled, so save these separately
        for k, v in self._annoy_objects.items():
            annoy_filepath = os.path.join(folder, '{}.ann'.format(k))
            v._ann_obj.save(annoy_filepath)
            files.append(annoy_filepath)
        pickle_filepath = os.path.join(folder, 'object.pickle')
        with open(pickle_filepath, 'wb') as handle:
            dill.dump(self, handle)
        files.append(pickle_filepath)

        # write entity types
        enttypes = self.get_entity_types()

        info_file = os.path.join(folder, 'entity_info.json')
        with open(info_file, 'w') as handle:
            json.dump(enttypes, handle)
        files.append(info_file)
        return files
项目:incubator-airflow-old    作者:apache    | 项目源码 | 文件源码
def _write_args(self, input_filename):
        # serialize args to file
        if self._pass_op_args():
            with open(input_filename, 'wb') as f:
                arg_dict = ({'args': self.op_args, 'kwargs': self.op_kwargs})
                if self.use_dill:
                    dill.dump(arg_dict, f)
                else:
                    pickle.dump(arg_dict, f)
项目:incubator-airflow-old    作者:apache    | 项目源码 | 文件源码
def _generate_python_code(self):
        if self.use_dill:
            pickling_library = 'dill'
        else:
            pickling_library = 'pickle'
        fn = self.python_callable
        # dont try to read pickle if we didnt pass anything
        if self._pass_op_args():
            load_args_line = 'with open(sys.argv[1], "rb") as f: arg_dict = {}.load(f)'.format(pickling_library)
        else:
            load_args_line = 'arg_dict = {"args": [], "kwargs": {}}'

        # no indents in original code so we can accept any type of indents in the original function
        # we deserialize args, call function, serialize result if necessary
        return dedent("""\
        import {pickling_library}
        import sys
        {load_args_code}
        args = arg_dict["args"]
        kwargs = arg_dict["kwargs"]
        with open(sys.argv[3], 'r') as f: virtualenv_string_args = list(map(lambda x: x.strip(), list(f)))
        {python_callable_lines}
        res = {python_callable_name}(*args, **kwargs)
        with open(sys.argv[2], 'wb') as f: res is not None and {pickling_library}.dump(res, f)
        """).format(
                load_args_code=load_args_line,
                python_callable_lines=dedent(inspect.getsource(fn)),
                python_callable_name=fn.__name__,
                pickling_library=pickling_library)

        self.log.info("Done.")
项目:dataset    作者:analysiscenter    | 项目源码 | 文件源码
def save(self, path):
        """ Save the model.

        Parameters
        ----------
        path : str
            a full path to a file where a model will be saved to
        """
        if self.estimator is not None:
            pickle.dump(self.estimator, path)
        else:
            raise ValueError("Scikit-learn estimator does not exist. Check your config for 'estimator'.")
项目:trex-http-proxy    作者:alwye    | 项目源码 | 文件源码
def save_session(fname=None, session=None, pickleProto=4):
    import dill as pickle

    if fname is None:
        fname = conf.session
        if not fname:
            conf.session = fname = utils.get_temp_file(keep=True)
            log_interactive.info("Use [%s] as session file" % fname)
    if session is None:
        session = builtins.__dict__["scapy_session"]

    to_be_saved = session.copy()

    for k in list(to_be_saved.keys()):
        if k in ["__builtins__", "In", "Out", "conf"] or k.startswith("_") or \
                (hasattr(to_be_saved[k], "__module__") and str(to_be_saved[k].__module__).startswith('IPython')):
            del(to_be_saved[k])
            continue
        if type(to_be_saved[k]) in [type, types.ModuleType, types.MethodType]:
             log_interactive.info("[%s] (%s) can't be saved." % (k, type(to_be_saved[k])))
             del(to_be_saved[k])

    try:
        os.rename(fname, fname+".bak")
    except OSError:
        pass
    f=gzip.open(fname,"wb")
    for i in to_be_saved.keys():
        #d = {i: to_be_saved[i]}
        #pickle.dump(d, f, pickleProto)
        pickle.dump(to_be_saved, f, pickleProto)
    f.close()
项目:trex-http-proxy    作者:alwye    | 项目源码 | 文件源码
def save_object(fname, obj):
    import dill as pickle
    pickle.dump(obj,gzip.open(fname,"wb"))
项目:elm    作者:ContinuumIO    | 项目源码 | 文件源码
def main():
    '''
    Beginning on START_DATE, step forward hourly, training on last
    hour's NLDAS FORA dataset with transformers in a 2-layer hierarchical
    ensemble, training on the last hour of data and making
    out-of-training-sample predictions for the current hour.  Makes
    a dill dump file for each hour run. Runs fro NSTEPS hour steps.
    '''
    date = START_DATE
    add_hour = datetime.timedelta(hours=1)
    get_file_name = lambda date: date.isoformat(
                        ).replace(':','_').replace('-','_') + '.dill'
    scalers = zip(('MinMaxScaler', 'RobustScaler', 'StandardScaler', 'None'),
                  (minmax, robust, standard, None))
    estimators = zip(('LinearRegression', ),
                     (linear, ))
    init_func = partial(ensemble_init_func,
                        pca=pca,
                        scalers=scalers,
                        n_components=n_components,
                        estimators=estimators,
                        preamble=preamble,
                        log=log,
                        minmax_bounds=minmax_bounds,
                        summary='Flatten, Subset, Drop NaN Rows, Get Y Data, Difference X in Time')
    for step in range(NSTEPS):
        last_hour_data = sampler(date, X_time_steps=X_TIME_STEPS)
        date += add_hour
        this_hour_data = sampler(date, X_time_steps=X_TIME_STEPS)
        current_file = get_file_name(date)
        out = train_model_on_models(last_hour_data, this_hour_data, init_func)
        dill.dump(out, open(current_file, 'wb'))
        print('Dumped to:', current_file)
        l2, t2, models, preds, models2, preds2 = out
        layer_1_scores = [model._score for _, model in models]
        layer_2_scores = [model._score for _, model in models2]
        print('Scores in layer 1 models:', layer_1_scores)
        print('Scores in layer 2 models:', layer_2_scores)
    return last_hour_data, this_hour_data, models, preds, models2, preds2
项目:elm    作者:ContinuumIO    | 项目源码 | 文件源码
def dump(self, file, protocol=None, byref=None, fmode=None, recurse=None):
        '''pickle (dill) an object to a file'''
        getattr(self, '_close', lambda: [])()
        return dill.dump(self, file, protocol=protocol,
                         byref=byref, fmode=fmode, recurse=recurse)
项目:elm    作者:ContinuumIO    | 项目源码 | 文件源码
def predict_to_pickle(prediction, fname_base):
    '''Dump a prediction y data'''
    mkdir_p(fname_base)
    fname = fname_base + '.xr'
    with open(fname, 'wb') as f:
        return dill.dump(prediction, f)
项目:adaware-nlp    作者:mhw32    | 项目源码 | 文件源码
def gen_brown_dataset(output_folder, num=None):
    sentences = brown.sents()

    if num:
        if num > len(sentences):
            num = len(sentences)
        sentences = sentences[:num]

    (X_train, X_test), (y_train, y_test), (K_train, K_test), param_dict = \
        gen_dataset(sentences)

    if output_folder:
        np.save(os.path.join(output_folder, 'X_train.npy'), X_train)
        np.save(os.path.join(output_folder, 'X_test.npy'), X_test)
        np.save(os.path.join(output_folder, 'y_train.npy'), y_train)
        np.save(os.path.join(output_folder, 'y_test.npy'), y_test)
        np.save(os.path.join(output_folder, 'K_train.npy'), K_train)
        np.save(os.path.join(output_folder, 'K_test.npy'), K_test)

        with open(os.path.join(output_folder, 'gen_param_dict.pkl'), 'w') as f:
            cPickle.dump(param_dict, f)
项目:adaware-nlp    作者:mhw32    | 项目源码 | 文件源码
def train_brown_lemmatizer(output_folder):
    obs_set = np.load(os.path.join(output_folder, 'X_train.npy'))
    out_set = np.load(os.path.join(output_folder, 'y_train.npy'))
    count_set = np.load(os.path.join(output_folder, 'K_train.npy'))
    nn_param_set = train_lemmatizer(
        obs_set,
        out_set,
        count_set,
        window_size=[2,2],
        positive_samples_only=True,
        batch_size=128,
        param_scale=0.01,
        num_epochs=4000,
        step_size=0.001,
        l2_lambda=0.1)

    if output_folder:
        with open(os.path.join(output_folder, 'nn_param_dict.pkl'), 'w') as f:
            dill.dump(nn_param_set, f)
项目:skip-thought-tf    作者:persiyanov    | 项目源码 | 文件源码
def save(textdata, fname):
        with open(fname, 'wb') as fout:
            dill.dump(textdata, fout)
项目:just    作者:kootenpv    | 项目源码 | 文件源码
def write(obj, fn):
    import dill
    with open(fn, "wb") as f:
        dill.dump(obj, f)
项目:pysc2-examples    作者:chris-chris    | 项目源码 | 文件源码
def save(self, path):
    """Save model to a pickle located at `path`"""
    with tempfile.TemporaryDirectory() as td:
      U.save_state(os.path.join(td, "model"))
      arc_name = os.path.join(td, "packed.zip")
      with zipfile.ZipFile(arc_name, 'w') as zipf:
        for root, dirs, files in os.walk(td):
          for fname in files:
            file_path = os.path.join(root, fname)
            if file_path != arc_name:
              zipf.write(file_path, os.path.relpath(file_path, td))
      with open(arc_name, "rb") as f:
        model_data = f.read()
    with open(path, "wb") as f:
      dill.dump((model_data), f)