Python itertools 模块,compress() 实例源码

我们从Python开源项目中,提取了以下47个代码示例,用于说明如何使用itertools.compress()

项目:ConceptualSpaces    作者:lbechberger    | 项目源码 | 文件源码
def simplify(cuboids):
    """Simplifies the given set of cuboids by removing redundant ones."""

    keep = [True]*len(cuboids)
    for i in range(len(cuboids)):

        p_min = cuboids[i]._p_min
        p_max = cuboids[i]._p_max
        for j in range(len(cuboids)):
            if i == j or keep[j] == False:
                continue
            if cuboids[j].contains(p_min) and cuboids[j].contains(p_max):
                keep[i] = False
                break

    return list(compress(cuboids, keep))
项目:auto_ml    作者:doordash    | 项目源码 | 文件源码
def transform(self, X, y=None):

        if self.selector == 'KeepAll':
            return X

        if scipy.sparse.issparse(X):
            if X.getformat() == 'csr':
                # convert to a csc (column) matrix, rather than a csr (row) matrix
                X = X.tocsc()

            # Slice that column matrix to only get the relevant columns that we already calculated in fit:
            X = X[:, self.index_mask]

            # convert back to a csr matrix
            return X.tocsr()

        # If this is a dense matrix:
        else:
            pruned_X = [list(itertools.compress(row, self.support_mask)) for row in X]
            return pruned_X
项目:drmad    作者:bigaidream-projects    | 项目源码 | 文件源码
def select_subclassdata(X, y,totalClassNum,SubClassNum, subClassIndexList,normalize=True):


    X= np.array(list(itertools.compress(X, [subClassIndexList.__contains__(c) for c in y])))
    y= np.array(list(itertools.compress(y, [subClassIndexList.__contains__(c) for c in y])))


    d = {}
    for i in xrange(SubClassNum):
        d.update({subClassIndexList[i]: (totalClassNum+i)})

    d1 = {}
    for i in xrange(SubClassNum):
        d1.update({(totalClassNum+i): i})

    for k, v in d.iteritems():
        np.place(y,y==k,v)
    for k, v in d1.iteritems():
        np.place(y,y==k,v)
    return X,y
项目:Lifting-from-the-Deep-release    作者:DenisTome    | 项目源码 | 文件源码
def import_json(path='json/MPI_annotations.json', order='json/MPI_order.npy'):
    """Get the json file containing the dataset.
    We want the data to be shuffled, however the training has to be repeatable.
    This means that once shuffled the order has to me mantained."""
    with open(path) as data_file:
        data_this = json.load(data_file)
        data_this = np.array(data_this['root'])
    num_samples = len(data_this)

    if os.path.exists(order):
        idx = np.load(order)
    else:
        idx = np.random.permutation(num_samples).tolist()
        np.save(order, idx)

    is_not_validation = [not data_this[i]['isValidation']
                         for i in range(num_samples)]
    keep_data_idx = list(compress(idx, is_not_validation))

    data = data_this[keep_data_idx]
    return data, len(keep_data_idx)
项目:NVDM-For-Document-Classification    作者:cryanzpj    | 项目源码 | 文件源码
def train_step(x_batch, y_batch, epoch):
            """
            A single training step
            """
            x_batch_id = [ _ for _ in itertools.compress(range(10000), map(lambda x: x>0,x_batch[0]))]
            feed_dict = {nvdm.input_x: x_batch, nvdm.x_id: x_batch_id}
            '''
            h1b = [v for v in tf.all_variables() if v.name == "h1/b:0"][0]
            h1w = [v for v in tf.all_variables() if v.name == "h1/w:0"][0]
            _, step, summaries, loss, kl, rc, p_xi_h, R, hb, hw, e  = sess.run(
                [nvdm.train_op, global_step, loss_summary, nvdm.loss, nvdm.KL, nvdm.recon_loss, nvdm.p_xi_h, nvdm.R, h1b, h1w, nvdm.e], feed_dict)
            '''
            _, step,  loss = sess.run([nvdm.train_op, nvdm.global_step, nvdm.loss], feed_dict)

            time_str = datetime.datetime.now().isoformat()
            if step % FLAGS.train_every == 0:
                print("time: {},  epoch: {}, step: {}, loss: {:g}".format(time_str,epoch, step, loss))
            if np.isnan(loss):
                import pdb
                pdb.set_trace()
            #train_summary_writer.add_summary(summaries, step)
项目:cryptoconditions    作者:bigchaindb    | 项目源码 | 文件源码
def from_asn1_dict(asn1_dict):
        asn1_type, value = asn1_dict.popitem()
        registered_type = TypeRegistry.find_by_asn1_type(asn1_type)
        # Instantiate condition
        condition = Condition()
        condition.type_id = registered_type['type_id']
        condition.hash = value['fingerprint']
        condition.cost = value['cost']
        condition._subtypes = set()
        if registered_type['class'].TYPE_CATEGORY == 'compound':
            subtypes = {
                TypeRegistry.find_by_type_id(type_id)['name']
                for type_id in compress(
                    range(Condition.MAX_SAFE_SUBTYPES),
                    map(lambda bit: int(bit), value['subtypes'])
                )
            }
            condition._subtypes.update(subtypes)

        return condition
项目:mglex    作者:fungs    | 项目源码 | 文件源码
def maximize_likelihood(self, data, responsibilities, weights, cmask=None):

        if not (cmask is None or cmask.shape == () or np.all(cmask)):  # cluster reduction
            responsibilities = responsibilities[:, cmask]
            self.names = list(compress(self.names, cmask))  # TODO: make self.names a numpy array?

        weights_combined = responsibilities * weights

        self.variables = np.dot(weights_combined.T, data.frequencies)
        with np.errstate(invalid='ignore'):  # if no training data is available for any class
            np.divide(self.variables, weights_combined.sum(axis=0, keepdims=True, dtype=types.large_float_type).T, out=self.variables)  # normalize before update, self.variables is types.prob_type

        dimchange = self.update()  # create cache for likelihood calculations

        # TODO: refactor this block
        ll = self.log_likelihood(data)
        std_per_class = common.weighted_std(ll, weights_combined)
        weight_per_class = weights_combined.sum(axis=0, dtype=types.large_float_type)
        weight_per_class /= weight_per_class.sum()
        std_per_class_mask = np.isnan(std_per_class)
        skipped_classes = std_per_class_mask.sum()
        self.stdev = np.ma.dot(np.ma.MaskedArray(std_per_class, mask=std_per_class_mask), weight_per_class)
        stderr.write("LOG %s: mean class likelihood standard deviation is %.2f (omitted %i/%i classes due to invalid or unsufficient data)\n" % (self._short_name, self.stdev, skipped_classes, self.num_components - skipped_classes))
        return dimchange, ll
项目:TensorFlowHub    作者:MJFND    | 项目源码 | 文件源码
def generate_batch(batch_size, num_skips, skip_window):
  global data_index
  assert batch_size % num_skips == 0
  assert num_skips <= 2 * skip_window
  batch = np.ndarray(shape=(batch_size,num_skips), dtype=np.int32)
  labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
  span = 2 * skip_window + 1 # [ skip_window target skip_window ]
  buffer = collections.deque(maxlen=span)
  for _ in range(span):
    buffer.append(data[data_index])
    data_index = (data_index + 1) % len(data)
  for i in range(batch_size):
    mask = [1] * span #[1 1 1]
    mask[skip_window] = 0 # [1 0 1] 
    batch[i, :] = list(compress(buffer, mask)) # all surrounding words
    labels[i, 0] = buffer[skip_window] # the word at the center 
    buffer.append(data[data_index])
    data_index = (data_index + 1) % len(data)
  return batch, labels
项目:scikit-dataaccess    作者:MITHaystack    | 项目源码 | 文件源码
def perturb(self):
        ''' Peturb the list by selecting a random subset of the initial list '''
        # randomly index list elements to be kept
        index = [random.randint(0,1) for r in range(len(self.val_init))]
        # update list and keep list values where index is 1
        self.val_list = list(itertools.compress(self.val_init, index))
项目:scikit-dataaccess    作者:MITHaystack    | 项目源码 | 文件源码
def perturb(self):
        ''' 
        Systematically change which item is absent from the list
        '''
        self.n = self.n + 1
        if self.n >= len(self.val_init):
            self.n = 0
        index = [1 for i in range(len(self.val_init))]
        index[self.n] = 0

        self.val_list = list(itertools.compress(self.val_init, index))
项目:multiNLI_encoder    作者:easonnie    | 项目源码 | 文件源码
def combine_two_set(set_1, set_2, rate=(1, 1), seed=0):
    np.random.seed(seed)
    len_1 = len(set_1)
    len_2 = len(set_2)
    # print(len_1, len_2)
    p1, p2 = rate
    c_1 = np.random.choice([0, 1], len_1, p=[1 - p1, p1])
    c_2 = np.random.choice([0, 1], len_2, p=[1 - p2, p2])
    iter_1 = itertools.compress(iter(set_1), c_1)
    iter_2 = itertools.compress(iter(set_2), c_2)
    for it in itertools.chain(iter_1, iter_2):
        yield it
项目:sudokuextract    作者:hbldh    | 项目源码 | 文件源码
def create_mnist_dataset():
    images, labels = get_mnist_raw_data()
    mask = labels != 0
    print("Pre-zero removal:  Label / N : {0}".format([(v, c) for v, c in zip(_range(10), np.bincount(labels))]))
    images = list(itertools.compress(images, mask))
    labels = labels[mask]

    images = images[3::20]
    labels = labels[3::20]

    print("Pre-blobify:  Label / N : {0}".format([(v, c) for v, c in zip(_range(10), np.bincount(labels))]))
    y = np.array(labels, 'int8')
    images, mask = blobify(images)
    y = y[mask]
    print("Post-blobify:  Label / N : {0}".format([(v, c) for v, c in zip(_range(10), np.bincount(y))]))

    print("Extract features...")
    X = np.array([extract_efd_features(img) for img in images])

    try:
        os.makedirs(os.path.expanduser('~/sudokuextract'))
    except:
        pass

    try:
        for i, (img, lbl) in enumerate(zip(images, labels)):
            img = Image.fromarray(img, 'L')
            with open(os.path.expanduser('~/sudokuextract/{1}_{0:04d}.jpg'.format(i + 1, lbl)), 'w') as f:
                img.save(f)
    except Exception as e:
        print(e)

    return images, labels, X, y
项目:catalyst    作者:enigmampc    | 项目源码 | 文件源码
def alive(self):
        return all(item() is not None
                   for item in compress(self._items, self._selectors))
项目:type2-fuzzy    作者:h4iku    | 项目源码 | 文件源码
def outlier_processing(intervals):
    """Outlier processing"""

    left = [x[0] for x in intervals]
    right = [x[1] for x in intervals]

    # Compute Q(0.25), Q(0.75) and IQR for left-ends
    lq25, lq75 = np.percentile(left, [25, 75])
    liqr = lq75 - lq25

    # Compute Q(0.25), Q(0.75) and IQR for right-ends
    rq25, rq75 = np.percentile(right, [25, 75])
    riqr = rq75 - rq25

    # Outlier processing for Left and Right bounds
    left_filtered = [x for x in intervals if (lq25 - 1.5 * liqr) <= x[0] <= (lq75 + 1.5 * liqr)]
    right_filtered = [x for x in left_filtered if (rq25 - 1.5 * riqr) <= x[1] <= (rq75 + 1.5 * riqr)]

    # Compute Q(0.25), Q(0.75) and IQR for interval length
    len_values = [x[1] - x[0] for x in right_filtered]
    lenq25, lenq75 = np.percentile(len_values, [25, 75])
    leniqr = lenq75 - lenq25

    # Outlier processing for interval length
    len_filtered = [x if (lenq25 - 1.5 * leniqr) <= x <= (lenq75 + 1.5 * leniqr) else None for x in len_values]
    selectors = [x is not None for x in len_filtered]
    filtered_intervals = list(itertools.compress(right_filtered, selectors))
    return filtered_intervals
项目:type2-fuzzy    作者:h4iku    | 项目源码 | 文件源码
def tolerance_limit_processing(intervals):
    """Tolerance limit processing"""

    left = [x[0] for x in intervals]
    right = [x[1] for x in intervals]
    mean_left = np.mean(left)
    std_left = np.std(left, ddof=1)
    mean_right = np.mean(right)
    std_right = np.std(right, ddof=1)

    limits = [32.019, 32.019, 8.380, 5.369, 4.275, 3.712, 3.369, 3.136, 2.967, 2.839,
        2.737, 2.655, 2.587, 2.529, 2.48, 2.437, 2.4, 2.366, 2.337, 2.31, 2.31, 2.31,
        2.31, 2.31, 2.208]
    k = limits[min(len(left) - 1, 24)]

    # Tolerance limit processing for Left and Right bounds
    left_filtered = [x for x in intervals if (mean_left - k * std_left) <= x[0] <= (mean_left + k * std_left)]
    right_filtered = [x for x in left_filtered if (mean_right - k * std_right) <= x[1] <= (mean_right + k * std_right)]

    # Tolerance limit processing for interval length
    len_values = [x[1] - x[0] for x in right_filtered]
    mean_len = np.mean(len_values)
    std_len = np.std(len_values, ddof=1)

    if std_len != 0:
        k = min(k, mean_len / std_len, (100 - mean_len) / std_len)

    len_filtered = [x if (mean_len - k * std_len) <= x <= (mean_len + k * std_len) else None for x in len_values]
    selectors = [x is not None for x in len_filtered]
    filtered_intervals = list(itertools.compress(right_filtered, selectors))
    return filtered_intervals
项目:pybotics    作者:nnadeau    | 项目源码 | 文件源码
def optimization_vector(self) -> np.ndarray:
        """
        Get the values of parameters being optimized.

        :return: optimization parameter values
        """
        filtered_iterator = compress(self.vector, self.optimization_mask)
        optimization_vector = np.array(list(filtered_iterator))
        return optimization_vector
项目:pybotics    作者:nnadeau    | 项目源码 | 文件源码
def optimization_vector(self) -> np.ndarray:
        """
        Return the values of parameters being optimized.

        :return: optimization parameter values
        """
        filtered_iterator = compress(self.vector(), self.optimization_mask)
        vector = np.array(list(filtered_iterator))
        return vector
项目:Thrifty    作者:swkrueger    | 项目源码 | 文件源码
def filter_duplicates(detections):
    """Return detections with duplicates and unidentified detections removed,
    sorted by timestamp."""
    mask = identify_duplicates(detections)
    filtered = list(itertools.compress(detections, mask))
    filtered.sort(key=lambda x: x.timestamp)
    return filtered
项目:Thrifty    作者:swkrueger    | 项目源码 | 文件源码
def make_detection_extractor(detections, matches):
    rxpair_detections = collections.defaultdict(list)
    for group in matches:
        for det0_id, det1_id in itertools.combinations(group, 2):
            det0 = detections[det0_id]
            det1 = detections[det1_id]
            if det0.rxid > det1.rxid:
                det0, det1 = det1, det0
            rxpair_detections[(det0.rxid, det1.rxid)].append((det0, det1))

    timestamps = {}
    for pair, detections in rxpair_detections.iteritems():
        detections.sort(cmp=lambda x, y: x[0].timestamp < y[0].timestamp)
        timestamps[pair] = [d[0].timestamp for d in detections]

    def extract(rxid0, rxid1, timestamp_start, timestamp_stop):
        assert rxid0 < rxid1
        pair = (rxid0, rxid1)
        left = bisect_left(timestamps[pair], timestamp_start)
        right = bisect_right(timestamps[pair], timestamp_stop)
        detection_pairs = rxpair_detections[pair][left:right]

        if len(detection_pairs) > 1:
            sdoa = np.array([d[0].soa - d[1].soa for d in detection_pairs])
            is_outlier = stat_tools.is_outlier(sdoa)
            detection_pairs = list(itertools.compress(detection_pairs,
                                                      ~is_outlier))

        return detection_pairs

    return extract
项目:TFG    作者:BraulioV    | 项目源码 | 文件源码
def split_in_pairs(split_list):
    """
    Input: ["Element1", "Element2", "Element3", "Element4"]
    Output: (["Element1", "Element3"], ["Element2", "Element4"])
    """
    def compress_elements(split_list, elements, times):
        return compress(split_list, chain.from_iterable(repeat(elements, times)))

    n_times = len(split_list) // 2
    return compress_elements(split_list, [1,0], n_times), compress_elements(split_list, [0,1], n_times)


# separate Class names and file names in two different lists
项目:TFG    作者:BraulioV    | 项目源码 | 文件源码
def assign_lab_hours(self):
        for group, it in zip(self.groups.values(), range(len(self.groups.items()))):
            # get subjects and its practical hours
            subject_list = self.__get_subj_list__(group)
            shuffle(subject_list)

            subject_list = self.recalculate_subjects(subject_list, group.numsubgroups)

            # compute range of shift
            if group.shift == 'M':
                start_range, end_range = 0, self.time_table.shape[1] // 2
            else:
                start_range, end_range = self.time_table.shape[1] // 2, self.time_table.shape[1]

            # compute the index
            subjects_index = [i for i in range(group.numsubgroups)]

            days_week = self.structure.shape[2]
            # compute the total lab hours, for each subject
            hours = list(map(lambda x: x*group.numsubgroups, [subject.practical_hours if type(subject) is not tuple
                     else subject[0].practical_hours + subject[1].practical_hours
                     for subject in subject_list]))
            # start loop
            for hour in range(start_range, end_range, 2):
                for day in range(days_week):
                    # if the cell is a lab cell, let's fill it
                    if (self.structure[it, hour, day] == 'L' or self.structure[it, hour, day] == 'E')\
                            and sum(compress(hours, map(lambda x: x in subjects_index, range(len(hours))))) > 0:
                        cell1, cell2 = self.compute_best_cells(group, subject_list, subjects_index, hours, hour, day)
                        self.time_table[it, hour, day] = cell1
                        self.time_table[it, hour + 1, day] = cell2

                        subjects_index = list(map(lambda x: (x + 1) % len(subject_list), subjects_index))
                if sum(hours) == 0: break
项目:bigfishtrader    作者:xingetouzi    | 项目源码 | 文件源码
def can_trade(self, *codes):
        if len(codes):
            return list(compress(codes, [self.cache.client.sismember('index', code) for code in codes]))
        else:
            return list(self.cache.client.smembers('index'))
项目:ML-Predictions    作者:ltfschoen    | 项目源码 | 文件源码
def setup_training_columns(self):
        """ Return array of Training Columns.

        When "training_columns" array is empty it means return all columns except the "target_column"
        """

        training_columns = self.prediction_config.DATASET_LOCATION[self.dataset_choice]["training_columns"]

        if not training_columns and not isinstance(self.df_listings, type(None)):
            features = self.df_listings.columns.tolist()

            # Remove "target_column" (if already in the dataset, as may not yet have been generated by Clustering)
            if self.target_column in features:
                features.remove(self.target_column)

            # Remove columns containing Excluded full text
            for index, column_name in enumerate(self.prediction_config.EXCLUDE_TRAINING_COLUMNS_WITH_FULL_TEXT):
                if column_name in features:
                    features.remove(column_name)

            # Retain columns that do not contain Excluded partial text
            is_features_to_retain = [False] * len(features)
            for idx_outer, column_partial_name in enumerate(self.prediction_config.EXCLUDE_TRAINING_COLUMNS_WITH_PARTIAL_TEXT):
                for idx_inner, column_name in enumerate(features):
                    if column_partial_name not in column_name:
                        is_features_to_retain[idx_inner] = True
            filtered = list(compress(features, is_features_to_retain))
            return filtered
        else:
            return training_columns
项目:Mac-Python-3.X    作者:L1nwatch    | 项目源码 | 文件源码
def data_deal_function():
    # compress()????????????.????????????????,??????????????.
    # ????????????????True?????
    # ??,????????????.???????Python??????????,??????
    # itertools.filterfalse()???????????,??????.???????????False???True???
    for item in it.compress([1, 2, 3, 4, 5], [False, True, False, 0, 1]):
        print(item)

    # dropwhile()?takewhile()?????????????.??????????????????????????,???????????????.
    # dropwhile()??????????????????????False.?takewhile()??????????False
    # ??,????????????????????????(??dropwhile????,????????????,?takewhile?????????)
    def __single_digit(n):
        return n < 10

    for n in it.dropwhile(__single_digit, range(20)):
        print(n, end=" ")
    for n in it.takewhile(__single_digit, range(20)):
        print(n, end=" ")

    # accumulate()?????????????????????????????(??????,????????????).??,???????
    # [1,2,3,4]??,???result1?1.?????????result1?2??result2,????.????????functools???reduce()????
    for n in it.accumulate([1, 2, 3, 4, ]):
        print(n, end=" ")
项目:eclipse2017    作者:google    | 项目源码 | 文件源码
def assemble(self, fnames):
        """
        Stitches together movies from an ordered list of filenames.
        Downloads new files from GCS then feeds files to ffmpeg.
        Returns list of files sucessfully stitched into movie & calls stats func
        """

        # Get files from GCS
        pool = Pool(min(len(fnames), constants.MOVIE_DAEMON_MAX_PROCESSES))
        results = pool.map(get_file_from_gcs, fnames)
        pool.terminate()

        # Start ffmpeg subprocess
        ffmpeg_cmd = ["ffmpeg","-y",        # Overwrite exsisting movie file
                    "-f", "image2pipe",
                    "-framerate", constants.MOVIE_FRAMERATE,
                    "-vcodec","mjpeg",
                    "-i", "-",              # Input pipe from stdin
                    "-vf", "scale=1024:-1",
                    "-loglevel", "panic",
                    "-vcodec", "libx264",
                    constants.MOVIE_FPATH]

        ffmpeg_ps = subprocess.Popen(ffmpeg_cmd, stdin=subprocess.PIPE)


        fnames = list(compress(fnames, results))
        files_read = self._pipe_to_ffmpeg(ffmpeg_ps, fnames)

        if files_read > constants.MOVIE_MIN_FRAMES:
            ffmpeg_ps.stdin.close()
            ffmpeg_ps.wait()
        else:
            ffmpeg_ps.kill()

        return fnames
项目:raccoon    作者:rsheftel    | 项目源码 | 文件源码
def select_index(self, compare, result='boolean'):
        """
        Finds the elements in the index that match the compare parameter and returns either a list of the values that
        match, of a boolean list the length of the index with True to each index that matches. If the indexes are
        tuples then the compare is a tuple where None in any field of the tuple will be treated as "*" and match all
        values.

        :param compare: value to compare as a singleton or tuple
        :param result: 'boolean' = returns a list of booleans, 'value' = returns a list of index values that match
        :return: list of booleans or values
        """
        if isinstance(compare, tuple):
            # this crazy list comprehension will match all the tuples in the list with None being an * wildcard
            booleans = [all([(compare[i] == w if compare[i] is not None else True) for i, w in enumerate(v)])
                        for x, v in enumerate(self._index)]
        else:
            booleans = [False] * len(self._index)
            if self._sort:
                booleans[sorted_index(self._index, compare)] = True
            else:
                booleans[self._index.index(compare)] = True
        if result == 'boolean':
            return booleans
        elif result == 'value':
            return list(compress(self._index, booleans))
        else:
            raise ValueError('only valid values for result parameter are: boolean or value.')
项目:raccoon    作者:rsheftel    | 项目源码 | 文件源码
def get_rows(self, indexes, column, as_list=False):
        """
        For a list of indexes and a single column name return the values of the indexes in that column.

        :param indexes: either a list of index values or a list of booleans with same length as all indexes
        :param column: single column name
        :param as_list: if True return a list, if False return DataFrame
        :return: DataFrame is as_list if False, a list if as_list is True
        """
        c = self._columns.index(column)
        if all([isinstance(i, bool) for i in indexes]):  # boolean list
            if len(indexes) != len(self._index):
                raise ValueError('boolean index list must be same size of existing index')
            if all(indexes):  # the entire column
                data = self._data[c]
                index = self._index
            else:
                data = list(compress(self._data[c], indexes))
                index = list(compress(self._index, indexes))
        else:  # index values list
            locations = [sorted_index(self._index, x) for x in indexes] if self._sort \
                else [self._index.index(x) for x in indexes]
            data = [self._data[c][i] for i in locations]
            index = [self._index[i] for i in locations]
        return data if as_list else DataFrame(data={column: data}, index=index, index_name=self._index_name,
                                              sort=self._sort)
项目:raccoon    作者:rsheftel    | 项目源码 | 文件源码
def get_matrix(self, indexes, columns):
        """
        For a list of indexes and list of columns return a DataFrame of the values.

        :param indexes: either a list of index values or a list of booleans with same length as all indexes
        :param columns: list of column names
        :return: DataFrame
        """
        if all([isinstance(i, bool) for i in indexes]):  # boolean list
            is_bool_indexes = True
            if len(indexes) != len(self._index):
                raise ValueError('boolean index list must be same size of existing index')
            bool_indexes = indexes
            indexes = list(compress(self._index, indexes))
        else:
            is_bool_indexes = False
            locations = [sorted_index(self._index, x) for x in indexes] if self._sort \
                else [self._index.index(x) for x in indexes]

        if all([isinstance(i, bool) for i in columns]):  # boolean list
            if len(columns) != len(self._columns):
                raise ValueError('boolean column list must be same size of existing columns')
            columns = list(compress(self._columns, columns))

        col_locations = [self._columns.index(x) for x in columns]
        data_dict = dict()

        for c in col_locations:
            data_dict[self._columns[c]] = list(compress(self._data[c], bool_indexes)) if is_bool_indexes \
                else [self._data[c][i] for i in locations]

        return DataFrame(data=data_dict, index=indexes, columns=columns, index_name=self._index_name,
                         sort=self._sort)
项目:raccoon    作者:rsheftel    | 项目源码 | 文件源码
def get_location(self, location, columns=None, as_dict=False, index=True):
        """
        For an index location and list of columns return a DataFrame of the values. This is optimized for speed because
        it does not need to lookup the index location with a search. Also can accept relative indexing from the end of
        the DataFrame in standard python notation [-3, -2, -1]

        :param location: index location in standard python form of positive or negative number
        :param columns: list of columns, or None to include all columns
        :param as_dict: if True then return a dictionary
        :param index: if True then include the index in the dictionary if as_dict=True
        :return: DataFrame or dictionary
        """
        if columns is None:
            columns = self._columns
        elif all([isinstance(i, bool) for i in columns]):
            if len(columns) != len(self._columns):
                raise ValueError('boolean column list must be same size of existing columns')
            columns = list(compress(self._columns, columns))
        data = dict()
        for column in columns:
            c = self._columns.index(column)
            data[column] = self._data[c][location]
        index_value = self._index[location]
        if as_dict:
            if index:
                data[self._index_name] = index_value
            return data
        else:
            data = {k: [data[k]] for k in data}  # this makes the dict items lists
            return DataFrame(data=data, index=[index_value], columns=columns, index_name=self._index_name,
                             sort=self._sort)
项目:raccoon    作者:rsheftel    | 项目源码 | 文件源码
def get_slice(self, start_index=None, stop_index=None, columns=None, as_dict=False):
        """
        For sorted DataFrames will return either a DataFrame or dict of all of the rows where the index is greater than
        or equal to the start_index if provided and less than or equal to the stop_index if provided. If either the
        start or stop index is None then will include from the first or last element, similar to standard python
        slide of [:5] or [:5]. Both end points are considered inclusive.

        :param start_index: lowest index value to include, or None to start from the first row
        :param stop_index: highest index value to include, or None to end at the last row
        :param columns: list of column names to include, or None for all columns
        :param as_dict: if True then return a tuple of (list of index, dict of column names: list data values)
        :return: DataFrame or tuple
        """
        if not self._sort:
            raise RuntimeError('Can only use get_slice on sorted DataFrames')

        if columns is None:
            columns = self._columns
        elif all([isinstance(i, bool) for i in columns]):
            if len(columns) != len(self._columns):
                raise ValueError('boolean column list must be same size of existing columns')
            columns = list(compress(self._columns, columns))

        start_location = bisect_left(self._index, start_index) if start_index is not None else None
        stop_location = bisect_right(self._index, stop_index) if stop_index is not None else None

        index = self._index[start_location:stop_location]
        data = dict()
        for column in columns:
            c = self._columns.index(column)
            data[column] = self._data[c][start_location:stop_location]

        if as_dict:
            return index, data
        else:
            data = data if data else None  # if the dict is empty, convert to None
            return DataFrame(data=data, index=index, columns=columns, index_name=self._index_name, sort=self._sort,
                             use_blist=self._blist)
项目:raccoon    作者:rsheftel    | 项目源码 | 文件源码
def select_index(self, compare, result='boolean'):
        """
        Finds the elements in the index that match the compare parameter and returns either a list of the values that
        match, of a boolean list the length of the index with True to each index that matches. If the indexes are
        tuples then the compare is a tuple where None in any field of the tuple will be treated as "*" and match all
        values.

        :param compare: value to compare as a singleton or tuple
        :param result: 'boolean' = returns a list of booleans, 'value' = returns a list of index values that match
        :return: list of booleans or values
        """
        if isinstance(compare, tuple):
            # this crazy list comprehension will match all the tuples in the list with None being an * wildcard
            booleans = [all([(compare[i] == w if compare[i] is not None else True) for i, w in enumerate(v)])
                        for x, v in enumerate(self._index)]
        else:
            booleans = [False] * len(self._index)
            if self._sort:
                booleans[sorted_index(self._index, compare)] = True
            else:
                booleans[self._index.index(compare)] = True
        if result == 'boolean':
            return booleans
        elif result == 'value':
            return list(compress(self._index, booleans))
        else:
            raise ValueError('only valid values for result parameter are: boolean or value.')
项目:Modern-Python-Cookbook    作者:PacktPublishing    | 项目源码 | 文件源码
def pass_outliers(data):
    return itertools.compress(data, (z >= 3.5 for z in z_mod(data)))
项目:Modern-Python-Cookbook    作者:PacktPublishing    | 项目源码 | 文件源码
def reject_outliers(data):
    return itertools.compress(data, (z < 3.5 for z in z_mod(data)))
项目:NVDM-For-Document-Classification    作者:cryanzpj    | 项目源码 | 文件源码
def prediction(x_sample, y_sample): # sample has size 20
            '''
            Get the perplexity of the test set
            '''

            perplist = []
            for i in range(20):
                x_batch_id = [ _ for _ in itertools.compress(range(10000), map(lambda x: x>0,x_sample[0]))]
                feed_dict = {nvdm.input_x: x_sample[i].reshape(1,10000)}
                step, p_xi_h = sess.run([nvdm.global_step, nvdm.p_xi_h], feed_dict)

                valid_p = np.mean(np.log(p_xi_h[x_batch_id]))
                perplist.append(valid_p)
            print("perplexity: {}".format(np.exp(-np.mean(perplist))))
项目:NVDM-For-Document-Classification    作者:cryanzpj    | 项目源码 | 文件源码
def train_step(x_batch, y_batch, epoch,predicts,labels):
            """
            A single training step
            """
            y_batch = y_batch.reshape(1,-1)
            x_batch_id = [ _ for _ in itertools.compress(range(10000), map(lambda x: x>0,x_batch[0]))]
            feed_dict = {nvdm.input_x: x_batch,
                         nvdm.input_y:y_batch,
                         nvdm.x_id: x_batch_id}
            '''
            h1b = [v for v in tf.all_variables() if v.name == "h1/b:0"][0]
            h1w = [v for v in tf.all_variables() if v.name == "h1/w:0"][0]
            _, step, summaries, loss, kl, rc, p_xi_h, R, hb, hw, e  = sess.run(
                [nvdm.train_op, global_step, loss_summary, nvdm.loss, nvdm.KL, nvdm.recon_loss, nvdm.p_xi_h, nvdm.R, h1b, h1w, nvdm.e], feed_dict)
            '''

            _, step,  loss,predict = sess.run([nvdm.train_op, nvdm.global_step, nvdm.loss,nvdm.predicts], feed_dict)


            time_str = datetime.datetime.now().isoformat()
            if step % FLAGS.train_every == 0:
                import pdb
                pdb.set_trace()


                score = f1_score_multiclass(np.array(predicts),np.array(labels))
                print("time: {},  epoch: {}, step: {}, loss: {:g}, score: {:g}".format(time_str,epoch, step, loss,score))

                return [],[]


            predicts.append(predict)
            labels.append(y_batch[0].astype(int))

            return predicts,labels

            if np.isnan(loss):
                import pdb
                pdb.set_trace()

            #train_summary_writer.add_summary(summaries, step)
项目:NVDM-For-Document-Classification    作者:cryanzpj    | 项目源码 | 文件源码
def prediction(x_sample, y_sample): # sample has size 20
            '''
            Get the perplexity of the test set
            '''
            perplist = []
            for i in range(20):
                x_batch_id = [ _ for _ in itertools.compress(range(10000), map(lambda x: x>0,x_sample[0]))]
                feed_dict = {nvdm.input_x: x_sample[i].reshape(1,10000),
                             nvdm.input_y: y_sample[i].reshape(1,103)}
                step, p_xi_h = sess.run([nvdm.global_step, nvdm.p_xi_h], feed_dict)

                valid_p = np.mean(np.log(p_xi_h[x_batch_id]))
                perplist.append(valid_p)
            print("perplexity: {}".format(np.exp(-np.mean(perplist))))
项目:NVDM-For-Document-Classification    作者:cryanzpj    | 项目源码 | 文件源码
def train(self, X_train, y_train):
        #self.saver.restore(self.sess, "./imdbmodel/model.ckpt")
        total_batch = X_train.shape[0] // self.batch_size 
        for e in range(self.epoch):
            perplist = []
            for i in range(total_batch):
                X_batch = X_train[i*self.batch_size:(i+1)*self.batch_size]
                y_batch = y_train[i*self.batch_size:(i+1)*self.batch_size]
                x_batch_id = [_ for _ in itertools.compress(range(self.feature_size), map(lambda x : x>0, X_batch[0].toarray()[0]))]
                feed_dict = {
                        self.input_x : X_batch.toarray(),
                        self.input_y : np.reshape(y_batch, [-1,1]),
                        self.x_id : x_batch_id
                        }
                _, loss =  self.sess.run([
                            self.train_op, 
                            self.loss], feed_dict)
                if np.isnan(loss):
                    import pdb
                    pdb.set_trace()
                if i % self.display_score == 0:
                    p_xi_h = self.sess.run([self.p_xi_h], feed_dict)
                    valid_p = np.mean(np.log(p_xi_h[0][x_batch_id]))
                    perplist.append(valid_p)
                    print("step: {}, perp: {:f}".format(i, np.exp(-np.mean(perplist))))
            # save model every epoch
                if i > 0 and i % 2000 == 0:
                    self.savemodel()
项目:MetaHeuristic    作者:gonzalesMK    | 项目源码 | 文件源码
def _evaluate(self, individual, X, y, cv=3):
        """ Evaluate method

        Parameters
        ----------
        individual: list [n_features]
                The input individual to be evaluated

        Return
        ----------
        Score of the individual : turple( cross_val_score, feature score)
        """
        # Select Features
        features = list(compress(range(len(individual)), individual))
        train = np.reshape([X[:, i] for i in features],
                           [len(features), len(X)]).T

        if train.shape[1] == 0:
            return 0,1,

        # Applying K-Fold Cross Validation
        accuracies = cross_val_score(estimator=clone(self.estimator), X=train, 
                                     y=y, cv=cv, 
                                     scoring=self.cv_metric_function)

        if self.features_metric_function == None :
            feature_score = pow(sum(individual)/(len(individual)*5), 2)
        else:
            feature_score = self.features_metric_function(individual)

        return accuracies.mean() - accuracies.std(), feature_score
项目:MetaHeuristic    作者:gonzalesMK    | 项目源码 | 文件源码
def _evaluate(self, individual, X, y, cv=3):
        """ Evaluate method

        Parameters
        ----------
        individual: list [n_features]
                The input individual to be evaluated

        Return
        ----------
        Score of the individual : turple( cross_val_score, feature score)
        """
        # Select Features
        features = list(compress(range(len(individual)), individual))
        train = np.reshape([X[:, i] for i in features],
                           [len(features), len(X)]).T

        if train.shape[1] == 0:
            return 0,1,

        # Applying K-Fold Cross Validation
        accuracies = cross_val_score(estimator=clone(self.estimator), X=train, 
                                     y=y, cv=cv, 
                                     scoring=self.cv_metric_function)

        if self.features_metric_function == "log" :
            feature_score = np.log10(9*(sum(individual)/len(individual))+1) 
        elif self.features_metric_function == "poly" :
            feature_score = sum(individual)/len(individual)
        else:
            raise ValueError('Unknow evaluation')

        return accuracies.mean() - accuracies.std(), feature_score
项目:tensorflow-playground    作者:wangz10    | 项目源码 | 文件源码
def generate_batch_pvdm(doc_ids, word_ids, batch_size, window_size):
    '''
    Batch generator for PV-DM (Distributed Memory Model of Paragraph Vectors).
    batch should be a shape of (batch_size, window_size+1)

    Parameters
    ----------
    doc_ids: list of document indices 
    word_ids: list of word indices
    batch_size: number of words in each mini-batch
    window_size: number of leading words before the target word 
    '''
    global data_index
    assert batch_size % window_size == 0
    batch = np.ndarray(shape=(batch_size, window_size + 1), dtype=np.int32)
    labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
    span = window_size + 1
    buffer = collections.deque(maxlen=span) # used for collecting word_ids[data_index] in the sliding window
    buffer_doc = collections.deque(maxlen=span) # collecting id of documents in the sliding window
    # collect the first window of words
    for _ in range(span):
        buffer.append(word_ids[data_index])
        buffer_doc.append(doc_ids[data_index])
        data_index = (data_index + 1) % len(word_ids)

    mask = [1] * span
    mask[-1] = 0 
    i = 0
    while i < batch_size:
        if len(set(buffer_doc)) == 1:
            doc_id = buffer_doc[-1]
            # all leading words and the doc_id
            batch[i, :] = list(compress(buffer, mask)) + [doc_id]
            labels[i, 0] = buffer[-1] # the last word at end of the sliding window
            i += 1
        # move the sliding window  
        buffer.append(word_ids[data_index])
        buffer_doc.append(doc_ids[data_index])
        data_index = (data_index + 1) % len(word_ids)

    return batch, labels
项目:tensorflow-playground    作者:wangz10    | 项目源码 | 文件源码
def generate_batch_cbow(data, batch_size, num_skips, skip_window):
    '''
    Batch generator for CBOW (Continuous Bag of Words).
    batch should be a shape of (batch_size, num_skips)

    Parameters
    ----------
    data: list of index of words
    batch_size: number of words in each mini-batch
    num_skips: number of surrounding words on both direction (2: one word ahead and one word following)
    skip_window: number of words at both ends of a sentence to skip (1: skip the first and last word of a sentence)
    '''
    global data_index
    assert batch_size % num_skips == 0
    assert num_skips <= 2 * skip_window
    batch = np.ndarray(shape=(batch_size, num_skips), dtype=np.int32)
    labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
    span = 2 * skip_window + 1 # [ skip_window target skip_window ]
    buffer = collections.deque(maxlen=span) # used for collecting data[data_index] in the sliding window
    # collect the first window of words
    for _ in range(span):
        buffer.append(data[data_index])
        data_index = (data_index + 1) % len(data)
    # move the sliding window  
    for i in range(batch_size):
        mask = [1] * span
        mask[skip_window] = 0 
        batch[i, :] = list(compress(buffer, mask)) # all surrounding words
        labels[i, 0] = buffer[skip_window] # the word at the center 
        buffer.append(data[data_index])
        data_index = (data_index + 1) % len(data)
    return batch, labels
项目:faampy    作者:ncasuk    | 项目源码 | 文件源码
def simplified(self):
        """
        Returns the reduced number of coordinates
        """
        if not self.Simple_mask:
            self._simplify_()
        return list(itertools.compress(self, self.Simple_mask))
项目:asynq    作者:quora    | 项目源码 | 文件源码
def afilter(function, sequence):
    """Equivalent of filter() that takes an async filter function.

    Returns a list.

    """
    if function is None:
        result(filter(None, sequence)); return
    should_include = yield [function.asynq(elt) for elt in sequence]
    result(list(itertools.compress(sequence, should_include))); return
项目:asynq    作者:quora    | 项目源码 | 文件源码
def afilterfalse(function, sequence):
    """Equivalent of itertools.ifilterfalse() that takes an async filter function.

    Returns a list.

    """
    should_exclude = yield [function.asynq(elt) for elt in sequence]
    should_include = [not res for res in should_exclude]
    result(list(itertools.compress(sequence, should_include))); return
项目:open-database    作者:mitaffinity    | 项目源码 | 文件源码
def retrieve(self, table, cols, col_rules):
        """ Retrieves column values from a single table based on a given filtering rule.

        Example:
        <pre lang="python">
        my_db.retrieve(some_table_table,["num1","num2"],{"remainder_div_3":"{}==1 or {}==2", "sum":"{}<200"})
        </pre>
        will retrieve:
        <pre lang="python">
        columns called "num1" and "num2" from some table. That have value 1 or 2 in the ramainder_div_3 column. Column
        named "sum" of which would be less than 200. All columns are combined with an "AND" statement.
        </pre>

        :param table: string (name of the table to retrieve from)
        :param columns: list of strings (names of the columns to retrieve)
        :param column_rules: dictionary of rules that will be evaluated
        :return: 
        Nested list in which is entry in a list a a column with filtered requested values
        """
        # todo: add string comp support
        cursor = self.conn.cursor()

        # from the table get all the columns to retrieve
        sql_cmd = "select " + " ,".join(cols) + " from \"" + table + "\""
        cursor.execute(sql_cmd)
        sel_sets = cursor.fetchall()

        if len(col_rules)==0:
            sel_vals = sel_sets
        else:
            # from the table select all the columns to filter for
            sql_cmd = "select " + ", ".join([key for key in col_rules]) + " from \"" + table + "\""
            cursor.execute(sql_cmd)
            filter_sets = cursor.fetchall()

            # repeat every argument number of times it appears in the selection
            mult = [len(re.findall("{}", col_rules[key])) for key in col_rules]

            def _repeat_vals(vals, repeats):
                rep_vals = []
                [[rep_vals.append(vals[i]) for _ in range(repeats[i])] for i in range(len(col_rules))]
                return rep_vals
            filter_sets = [_repeat_vals(set, mult) for set in filter_sets]

            # evaluate every row to get a boolean mask of examples
            rule_tmp = "(" + ") and (".join([col_rules[key] for key in col_rules]) + ")"
            sel_mask = [eval(rule_tmp.format(*val_set)) for val_set in filter_sets]

            # apply a boolean mask to take only entries that fit the selection rule
            sel_sets = list(compress(sel_sets, sel_mask))
            sel_vals = sel_sets
            #sel_vals = [list(x) for x in zip(*sel_sets)]
        return sel_vals
项目:pyshtrih    作者:oleg-golovanov    | 项目源码 | 文件源码
def handle_fr_flags(arg):
    def get_keys(revision):
        return (
            (u'??????????? ???????? ??????????', u'????? ???????? ??????')[revision],
            u'???? ????? ?????????',
            (u'????? ?????? ??????? ????????', u'?????? ?? ?????? ?? ??????????')[revision],
            (u'????? ??????? ??????? ????????', u'?????? ?? ????? ? ?????????', u'?????? ????????')[revision],
            u'???????? ????',
            u'?????? ??????? ??',
            u'????? ???????????? ??????? ?????',
            u'????? ???????????? ??????????? ?????',
            u'?????????? ?????? ??????? ?????',
            u'?????????? ?????? ????????????? ???????',
            u'????',
            u'????????? ?????????? ?????',
            u'?????? ?????? ??????????? ?????????',
            u'??????? ?????? ??????????? ?????????',
            u'????? ??????? ?????',
            u'????? ????????????? ???????'
        )

    bits = misc.int_to_bits(arg, 16)

    a, b, c = 0, 1, 2
    flags_actual = {
        # ?????-??-?
        4: ((0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1), a),
        # ?????-?????-??-?
        9: ((0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0), a),
        # ?????-?????-??-? (?????? 02)
        12: ((0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0), a)
    }

    flags, rev = flags_actual.get(
        handle_fr_flags.model,
        ((1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), a)
    )

    return dict(
        zip(
            itertools.compress(get_keys(rev), flags),
            itertools.compress(bits, flags)
        )
    )
项目:tensorflow-playground    作者:wangz10    | 项目源码 | 文件源码
def generate_batch_pvdm(batch_size, window_size):
    '''
    Batch generator for PV-DM (Distributed Memory Model of Paragraph Vectors).
    batch should be a shape of (batch_size, window_size+1)

    Parameters
    ----------
    batch_size: number of words in each mini-batch
    window_size: number of leading words on before the target word direction 
    '''
    global data_index
    assert batch_size % window_size == 0
    batch = np.ndarray(shape=(batch_size, window_size + 1), dtype=np.int32)
    labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
    span = window_size + 1
    buffer = collections.deque(maxlen=span) # used for collecting word_ids[data_index] in the sliding window
    buffer_doc = collections.deque(maxlen=span) # collecting id of documents in the sliding window
    # collect the first window of words
    for _ in range(span):
        buffer.append(word_ids[data_index])
        buffer_doc.append(doc_ids[data_index])
        data_index = (data_index + 1) % len(word_ids)

    mask = [1] * span
    mask[-1] = 0 
    i = 0
    while i < batch_size:
        if len(set(buffer_doc)) == 1:
            doc_id = buffer_doc[-1]
            # all leading words and the doc_id
            batch[i, :] = list(compress(buffer, mask)) + [doc_id]
            labels[i, 0] = buffer[-1] # the last word at end of the sliding window
            i += 1
            # print buffer
            # print list(compress(buffer, mask))
        # move the sliding window  
        buffer.append(word_ids[data_index])
        buffer_doc.append(doc_ids[data_index])
        data_index = (data_index + 1) % len(word_ids)

    return batch, labels

## examinng the batch generator function