Python itertools 模块，compress() 实例源码

我们从Python开源项目中，提取了以下47个代码示例，用于说明如何使用itertools.compress()。

项目：ConceptualSpaces 作者：lbechberger | 项目源码 | 文件源码

def simplify(cuboids):
    """Simplifies the given set of cuboids by removing redundant ones."""

    keep = [True]*len(cuboids)
    for i in range(len(cuboids)):

        p_min = cuboids[i]._p_min
        p_max = cuboids[i]._p_max
        for j in range(len(cuboids)):
            if i == j or keep[j] == False:
                continue
            if cuboids[j].contains(p_min) and cuboids[j].contains(p_max):
                keep[i] = False
                break

    return list(compress(cuboids, keep))

项目：auto_ml 作者：doordash | 项目源码 | 文件源码

def transform(self, X, y=None):

        if self.selector == 'KeepAll':
            return X

        if scipy.sparse.issparse(X):
            if X.getformat() == 'csr':
                # convert to a csc (column) matrix, rather than a csr (row) matrix
                X = X.tocsc()

            # Slice that column matrix to only get the relevant columns that we already calculated in fit:
            X = X[:, self.index_mask]

            # convert back to a csr matrix
            return X.tocsr()

        # If this is a dense matrix:
        else:
            pruned_X = [list(itertools.compress(row, self.support_mask)) for row in X]
            return pruned_X

项目：drmad 作者：bigaidream-projects | 项目源码 | 文件源码

def select_subclassdata(X, y,totalClassNum,SubClassNum, subClassIndexList,normalize=True):


    X= np.array(list(itertools.compress(X, [subClassIndexList.__contains__(c) for c in y])))
    y= np.array(list(itertools.compress(y, [subClassIndexList.__contains__(c) for c in y])))


    d = {}
    for i in xrange(SubClassNum):
        d.update({subClassIndexList[i]: (totalClassNum+i)})

    d1 = {}
    for i in xrange(SubClassNum):
        d1.update({(totalClassNum+i): i})

    for k, v in d.iteritems():
        np.place(y,y==k,v)
    for k, v in d1.iteritems():
        np.place(y,y==k,v)
    return X,y

项目：Lifting-from-the-Deep-release 作者：DenisTome | 项目源码 | 文件源码

def import_json(path='json/MPI_annotations.json', order='json/MPI_order.npy'):
    """Get the json file containing the dataset.
    We want the data to be shuffled, however the training has to be repeatable.
    This means that once shuffled the order has to me mantained."""
    with open(path) as data_file:
        data_this = json.load(data_file)
        data_this = np.array(data_this['root'])
    num_samples = len(data_this)

    if os.path.exists(order):
        idx = np.load(order)
    else:
        idx = np.random.permutation(num_samples).tolist()
        np.save(order, idx)

    is_not_validation = [not data_this[i]['isValidation']
                         for i in range(num_samples)]
    keep_data_idx = list(compress(idx, is_not_validation))

    data = data_this[keep_data_idx]
    return data, len(keep_data_idx)

项目：NVDM-For-Document-Classification 作者：cryanzpj | 项目源码 | 文件源码

def train_step(x_batch, y_batch, epoch):
            """
            A single training step
            """
            x_batch_id = [ _ for _ in itertools.compress(range(10000), map(lambda x: x>0,x_batch[0]))]
            feed_dict = {nvdm.input_x: x_batch, nvdm.x_id: x_batch_id}
            '''
            h1b = [v for v in tf.all_variables() if v.name == "h1/b:0"][0]
            h1w = [v for v in tf.all_variables() if v.name == "h1/w:0"][0]
            _, step, summaries, loss, kl, rc, p_xi_h, R, hb, hw, e  = sess.run(
                [nvdm.train_op, global_step, loss_summary, nvdm.loss, nvdm.KL, nvdm.recon_loss, nvdm.p_xi_h, nvdm.R, h1b, h1w, nvdm.e], feed_dict)
            '''
            _, step,  loss = sess.run([nvdm.train_op, nvdm.global_step, nvdm.loss], feed_dict)

            time_str = datetime.datetime.now().isoformat()
            if step % FLAGS.train_every == 0:
                print("time: {},  epoch: {}, step: {}, loss: {:g}".format(time_str,epoch, step, loss))
            if np.isnan(loss):
                import pdb
                pdb.set_trace()
            #train_summary_writer.add_summary(summaries, step)

项目：cryptoconditions 作者：bigchaindb | 项目源码 | 文件源码

def from_asn1_dict(asn1_dict):
        asn1_type, value = asn1_dict.popitem()
        registered_type = TypeRegistry.find_by_asn1_type(asn1_type)
        # Instantiate condition
        condition = Condition()
        condition.type_id = registered_type['type_id']
        condition.hash = value['fingerprint']
        condition.cost = value['cost']
        condition._subtypes = set()
        if registered_type['class'].TYPE_CATEGORY == 'compound':
            subtypes = {
                TypeRegistry.find_by_type_id(type_id)['name']
                for type_id in compress(
                    range(Condition.MAX_SAFE_SUBTYPES),
                    map(lambda bit: int(bit), value['subtypes'])
                )
            }
            condition._subtypes.update(subtypes)

        return condition

项目：mglex 作者：fungs | 项目源码 | 文件源码

def maximize_likelihood(self, data, responsibilities, weights, cmask=None):

        if not (cmask is None or cmask.shape == () or np.all(cmask)):  # cluster reduction
            responsibilities = responsibilities[:, cmask]
            self.names = list(compress(self.names, cmask))  # TODO: make self.names a numpy array?

        weights_combined = responsibilities * weights

        self.variables = np.dot(weights_combined.T, data.frequencies)
        with np.errstate(invalid='ignore'):  # if no training data is available for any class
            np.divide(self.variables, weights_combined.sum(axis=0, keepdims=True, dtype=types.large_float_type).T, out=self.variables)  # normalize before update, self.variables is types.prob_type

        dimchange = self.update()  # create cache for likelihood calculations

        # TODO: refactor this block
        ll = self.log_likelihood(data)
        std_per_class = common.weighted_std(ll, weights_combined)
        weight_per_class = weights_combined.sum(axis=0, dtype=types.large_float_type)
        weight_per_class /= weight_per_class.sum()
        std_per_class_mask = np.isnan(std_per_class)
        skipped_classes = std_per_class_mask.sum()
        self.stdev = np.ma.dot(np.ma.MaskedArray(std_per_class, mask=std_per_class_mask), weight_per_class)
        stderr.write("LOG %s: mean class likelihood standard deviation is %.2f (omitted %i/%i classes due to invalid or unsufficient data)\n" % (self._short_name, self.stdev, skipped_classes, self.num_components - skipped_classes))
        return dimchange, ll

项目：TensorFlowHub 作者：MJFND | 项目源码 | 文件源码

def generate_batch(batch_size, num_skips, skip_window):
  global data_index
  assert batch_size % num_skips == 0
  assert num_skips <= 2 * skip_window
  batch = np.ndarray(shape=(batch_size,num_skips), dtype=np.int32)
  labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
  span = 2 * skip_window + 1 # [ skip_window target skip_window ]
  buffer = collections.deque(maxlen=span)
  for _ in range(span):
    buffer.append(data[data_index])
    data_index = (data_index + 1) % len(data)
  for i in range(batch_size):
    mask = [1] * span #[1 1 1]
    mask[skip_window] = 0 # [1 0 1] 
    batch[i, :] = list(compress(buffer, mask)) # all surrounding words
    labels[i, 0] = buffer[skip_window] # the word at the center 
    buffer.append(data[data_index])
    data_index = (data_index + 1) % len(data)
  return batch, labels

项目：scikit-dataaccess 作者：MITHaystack | 项目源码 | 文件源码

def perturb(self):
        ''' Peturb the list by selecting a random subset of the initial list '''
        # randomly index list elements to be kept
        index = [random.randint(0,1) for r in range(len(self.val_init))]
        # update list and keep list values where index is 1
        self.val_list = list(itertools.compress(self.val_init, index))

项目：scikit-dataaccess 作者：MITHaystack | 项目源码 | 文件源码

def perturb(self):
        ''' 
        Systematically change which item is absent from the list
        '''
        self.n = self.n + 1
        if self.n >= len(self.val_init):
            self.n = 0
        index = [1 for i in range(len(self.val_init))]
        index[self.n] = 0

        self.val_list = list(itertools.compress(self.val_init, index))

项目：multiNLI_encoder 作者：easonnie | 项目源码 | 文件源码

def combine_two_set(set_1, set_2, rate=(1, 1), seed=0):
    np.random.seed(seed)
    len_1 = len(set_1)
    len_2 = len(set_2)
    # print(len_1, len_2)
    p1, p2 = rate
    c_1 = np.random.choice([0, 1], len_1, p=[1 - p1, p1])
    c_2 = np.random.choice([0, 1], len_2, p=[1 - p2, p2])
    iter_1 = itertools.compress(iter(set_1), c_1)
    iter_2 = itertools.compress(iter(set_2), c_2)
    for it in itertools.chain(iter_1, iter_2):
        yield it

项目：sudokuextract 作者：hbldh | 项目源码 | 文件源码

def create_mnist_dataset():
    images, labels = get_mnist_raw_data()
    mask = labels != 0
    print("Pre-zero removal:  Label / N : {0}".format([(v, c) for v, c in zip(_range(10), np.bincount(labels))]))
    images = list(itertools.compress(images, mask))
    labels = labels[mask]

    images = images[3::20]
    labels = labels[3::20]

    print("Pre-blobify:  Label / N : {0}".format([(v, c) for v, c in zip(_range(10), np.bincount(labels))]))
    y = np.array(labels, 'int8')
    images, mask = blobify(images)
    y = y[mask]
    print("Post-blobify:  Label / N : {0}".format([(v, c) for v, c in zip(_range(10), np.bincount(y))]))

    print("Extract features...")
    X = np.array([extract_efd_features(img) for img in images])

    try:
        os.makedirs(os.path.expanduser('~/sudokuextract'))
    except:
        pass

    try:
        for i, (img, lbl) in enumerate(zip(images, labels)):
            img = Image.fromarray(img, 'L')
            with open(os.path.expanduser('~/sudokuextract/{1}_{0:04d}.jpg'.format(i + 1, lbl)), 'w') as f:
                img.save(f)
    except Exception as e:
        print(e)

    return images, labels, X, y

项目：catalyst 作者：enigmampc | 项目源码 | 文件源码

def alive(self):
        return all(item() is not None
                   for item in compress(self._items, self._selectors))

项目：type2-fuzzy 作者：h4iku | 项目源码 | 文件源码

def outlier_processing(intervals):
    """Outlier processing"""

    left = [x[0] for x in intervals]
    right = [x[1] for x in intervals]

    # Compute Q(0.25), Q(0.75) and IQR for left-ends
    lq25, lq75 = np.percentile(left, [25, 75])
    liqr = lq75 - lq25

    # Compute Q(0.25), Q(0.75) and IQR for right-ends
    rq25, rq75 = np.percentile(right, [25, 75])
    riqr = rq75 - rq25

    # Outlier processing for Left and Right bounds
    left_filtered = [x for x in intervals if (lq25 - 1.5 * liqr) <= x[0] <= (lq75 + 1.5 * liqr)]
    right_filtered = [x for x in left_filtered if (rq25 - 1.5 * riqr) <= x[1] <= (rq75 + 1.5 * riqr)]

    # Compute Q(0.25), Q(0.75) and IQR for interval length
    len_values = [x[1] - x[0] for x in right_filtered]
    lenq25, lenq75 = np.percentile(len_values, [25, 75])
    leniqr = lenq75 - lenq25

    # Outlier processing for interval length
    len_filtered = [x if (lenq25 - 1.5 * leniqr) <= x <= (lenq75 + 1.5 * leniqr) else None for x in len_values]
    selectors = [x is not None for x in len_filtered]
    filtered_intervals = list(itertools.compress(right_filtered, selectors))
    return filtered_intervals

项目：type2-fuzzy 作者：h4iku | 项目源码 | 文件源码

def tolerance_limit_processing(intervals):
    """Tolerance limit processing"""

    left = [x[0] for x in intervals]
    right = [x[1] for x in intervals]
    mean_left = np.mean(left)
    std_left = np.std(left, ddof=1)
    mean_right = np.mean(right)
    std_right = np.std(right, ddof=1)

    limits = [32.019, 32.019, 8.380, 5.369, 4.275, 3.712, 3.369, 3.136, 2.967, 2.839,
        2.737, 2.655, 2.587, 2.529, 2.48, 2.437, 2.4, 2.366, 2.337, 2.31, 2.31, 2.31,
        2.31, 2.31, 2.208]
    k = limits[min(len(left) - 1, 24)]

    # Tolerance limit processing for Left and Right bounds
    left_filtered = [x for x in intervals if (mean_left - k * std_left) <= x[0] <= (mean_left + k * std_left)]
    right_filtered = [x for x in left_filtered if (mean_right - k * std_right) <= x[1] <= (mean_right + k * std_right)]

    # Tolerance limit processing for interval length
    len_values = [x[1] - x[0] for x in right_filtered]
    mean_len = np.mean(len_values)
    std_len = np.std(len_values, ddof=1)

    if std_len != 0:
        k = min(k, mean_len / std_len, (100 - mean_len) / std_len)

    len_filtered = [x if (mean_len - k * std_len) <= x <= (mean_len + k * std_len) else None for x in len_values]
    selectors = [x is not None for x in len_filtered]
    filtered_intervals = list(itertools.compress(right_filtered, selectors))
    return filtered_intervals

项目：pybotics 作者：nnadeau | 项目源码 | 文件源码

def optimization_vector(self) -> np.ndarray:
        """
        Get the values of parameters being optimized.

        :return: optimization parameter values
        """
        filtered_iterator = compress(self.vector, self.optimization_mask)
        optimization_vector = np.array(list(filtered_iterator))
        return optimization_vector

项目：pybotics 作者：nnadeau | 项目源码 | 文件源码

def optimization_vector(self) -> np.ndarray:
        """
        Return the values of parameters being optimized.

        :return: optimization parameter values
        """
        filtered_iterator = compress(self.vector(), self.optimization_mask)
        vector = np.array(list(filtered_iterator))
        return vector

项目：Thrifty 作者：swkrueger | 项目源码 | 文件源码

def filter_duplicates(detections):
    """Return detections with duplicates and unidentified detections removed,
    sorted by timestamp."""
    mask = identify_duplicates(detections)
    filtered = list(itertools.compress(detections, mask))
    filtered.sort(key=lambda x: x.timestamp)
    return filtered

项目：Thrifty 作者：swkrueger | 项目源码 | 文件源码

def make_detection_extractor(detections, matches):
    rxpair_detections = collections.defaultdict(list)
    for group in matches:
        for det0_id, det1_id in itertools.combinations(group, 2):
            det0 = detections[det0_id]
            det1 = detections[det1_id]
            if det0.rxid > det1.rxid:
                det0, det1 = det1, det0
            rxpair_detections[(det0.rxid, det1.rxid)].append((det0, det1))

    timestamps = {}
    for pair, detections in rxpair_detections.iteritems():
        detections.sort(cmp=lambda x, y: x[0].timestamp < y[0].timestamp)
        timestamps[pair] = [d[0].timestamp for d in detections]

    def extract(rxid0, rxid1, timestamp_start, timestamp_stop):
        assert rxid0 < rxid1
        pair = (rxid0, rxid1)
        left = bisect_left(timestamps[pair], timestamp_start)
        right = bisect_right(timestamps[pair], timestamp_stop)
        detection_pairs = rxpair_detections[pair][left:right]

        if len(detection_pairs) > 1:
            sdoa = np.array([d[0].soa - d[1].soa for d in detection_pairs])
            is_outlier = stat_tools.is_outlier(sdoa)
            detection_pairs = list(itertools.compress(detection_pairs,
                                                      ~is_outlier))

        return detection_pairs

    return extract

项目：TFG 作者：BraulioV | 项目源码 | 文件源码

def split_in_pairs(split_list):
    """
    Input: ["Element1", "Element2", "Element3", "Element4"]
    Output: (["Element1", "Element3"], ["Element2", "Element4"])
    """
    def compress_elements(split_list, elements, times):
        return compress(split_list, chain.from_iterable(repeat(elements, times)))

    n_times = len(split_list) // 2
    return compress_elements(split_list, [1,0], n_times), compress_elements(split_list, [0,1], n_times)


# separate Class names and file names in two different lists

项目：TFG 作者：BraulioV | 项目源码 | 文件源码

def assign_lab_hours(self):
        for group, it in zip(self.groups.values(), range(len(self.groups.items()))):
            # get subjects and its practical hours
            subject_list = self.__get_subj_list__(group)
            shuffle(subject_list)

            subject_list = self.recalculate_subjects(subject_list, group.numsubgroups)

            # compute range of shift
            if group.shift == 'M':
                start_range, end_range = 0, self.time_table.shape[1] // 2
            else:
                start_range, end_range = self.time_table.shape[1] // 2, self.time_table.shape[1]

            # compute the index
            subjects_index = [i for i in range(group.numsubgroups)]

            days_week = self.structure.shape[2]
            # compute the total lab hours, for each subject
            hours = list(map(lambda x: x*group.numsubgroups, [subject.practical_hours if type(subject) is not tuple
                     else subject[0].practical_hours + subject[1].practical_hours
                     for subject in subject_list]))
            # start loop
            for hour in range(start_range, end_range, 2):
                for day in range(days_week):
                    # if the cell is a lab cell, let's fill it
                    if (self.structure[it, hour, day] == 'L' or self.structure[it, hour, day] == 'E')\
                            and sum(compress(hours, map(lambda x: x in subjects_index, range(len(hours))))) > 0:
                        cell1, cell2 = self.compute_best_cells(group, subject_list, subjects_index, hours, hour, day)
                        self.time_table[it, hour, day] = cell1
                        self.time_table[it, hour + 1, day] = cell2

                        subjects_index = list(map(lambda x: (x + 1) % len(subject_list), subjects_index))
                if sum(hours) == 0: break

项目：bigfishtrader 作者：xingetouzi | 项目源码 | 文件源码

def can_trade(self, *codes):
        if len(codes):
            return list(compress(codes, [self.cache.client.sismember('index', code) for code in codes]))
        else:
            return list(self.cache.client.smembers('index'))

项目：ML-Predictions 作者：ltfschoen | 项目源码 | 文件源码

def setup_training_columns(self):
        """ Return array of Training Columns.

        When "training_columns" array is empty it means return all columns except the "target_column"
        """

        training_columns = self.prediction_config.DATASET_LOCATION[self.dataset_choice]["training_columns"]

        if not training_columns and not isinstance(self.df_listings, type(None)):
            features = self.df_listings.columns.tolist()

            # Remove "target_column" (if already in the dataset, as may not yet have been generated by Clustering)
            if self.target_column in features:
                features.remove(self.target_column)

            # Remove columns containing Excluded full text
            for index, column_name in enumerate(self.prediction_config.EXCLUDE_TRAINING_COLUMNS_WITH_FULL_TEXT):
                if column_name in features:
                    features.remove(column_name)

            # Retain columns that do not contain Excluded partial text
            is_features_to_retain = [False] * len(features)
            for idx_outer, column_partial_name in enumerate(self.prediction_config.EXCLUDE_TRAINING_COLUMNS_WITH_PARTIAL_TEXT):
                for idx_inner, column_name in enumerate(features):
                    if column_partial_name not in column_name:
                        is_features_to_retain[idx_inner] = True
            filtered = list(compress(features, is_features_to_retain))
            return filtered
        else:
            return training_columns

项目：Mac-Python-3.X 作者：L1nwatch | 项目源码 | 文件源码

def data_deal_function():
    # compress()????????????.????????????????,??????????????.
    # ????????????????True?????
    # ??,????????????.???????Python??????????,??????
    # itertools.filterfalse()???????????,??????.???????????False???True???
    for item in it.compress([1, 2, 3, 4, 5], [False, True, False, 0, 1]):
        print(item)

    # dropwhile()?takewhile()?????????????.??????????????????????????,???????????????.
    # dropwhile()??????????????????????False.?takewhile()??????????False
    # ??,????????????????????????(??dropwhile????,????????????,?takewhile?????????)
    def __single_digit(n):
        return n < 10

    for n in it.dropwhile(__single_digit, range(20)):
        print(n, end=" ")
    for n in it.takewhile(__single_digit, range(20)):
        print(n, end=" ")

    # accumulate()?????????????????????????????(??????,????????????).??,???????
    # [1,2,3,4]??,???result1?1.?????????result1?2??result2,????.????????functools???reduce()????
    for n in it.accumulate([1, 2, 3, 4, ]):
        print(n, end=" ")

项目：eclipse2017 作者：google | 项目源码 | 文件源码

def assemble(self, fnames):
        """
        Stitches together movies from an ordered list of filenames.
        Downloads new files from GCS then feeds files to ffmpeg.
        Returns list of files sucessfully stitched into movie & calls stats func
        """

        # Get files from GCS
        pool = Pool(min(len(fnames), constants.MOVIE_DAEMON_MAX_PROCESSES))
        results = pool.map(get_file_from_gcs, fnames)
        pool.terminate()

        # Start ffmpeg subprocess
        ffmpeg_cmd = ["ffmpeg","-y",        # Overwrite exsisting movie file
                    "-f", "image2pipe",
                    "-framerate", constants.MOVIE_FRAMERATE,
                    "-vcodec","mjpeg",
                    "-i", "-",              # Input pipe from stdin
                    "-vf", "scale=1024:-1",
                    "-loglevel", "panic",
                    "-vcodec", "libx264",
                    constants.MOVIE_FPATH]

        ffmpeg_ps = subprocess.Popen(ffmpeg_cmd, stdin=subprocess.PIPE)


        fnames = list(compress(fnames, results))
        files_read = self._pipe_to_ffmpeg(ffmpeg_ps, fnames)

        if files_read > constants.MOVIE_MIN_FRAMES:
            ffmpeg_ps.stdin.close()
            ffmpeg_ps.wait()
        else:
            ffmpeg_ps.kill()

        return fnames