Python torch.utils.data 模块,DataLoader() 实例源码

我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用torch.utils.data.DataLoader()

项目:pathnet-pytorch    作者:kimhc6028    | 项目源码 | 文件源码
def convert2tensor(self, dataset, batch_size, limit):
        data = dataset['data']
        data = data[:limit]
        print("normalizing images...")
        data = common.normalize(data)
        print("done")
        target = dataset['labels']
        target = target[:limit]
        target = np.asarray(target)

        tensor_data = torch.from_numpy(data)
        tensor_data = tensor_data.float()
        tensor_target = torch.from_numpy(target)

        loader = data_utils.TensorDataset(tensor_data, tensor_target)
        loader_dataset = data_utils.DataLoader(loader, batch_size=batch_size, shuffle = True)
        return loader_dataset
项目:pathnet-pytorch    作者:kimhc6028    | 项目源码 | 文件源码
def convert2tensor(self, dataset, batch_size, limit):
        b_data = dataset['X']
        b_data = b_data[:limit]
        print("normalizing images...")
        b_data = common.normalize(b_data)
        print("done")
        target = dataset['y']
        target = target.reshape((len(target)))
        target = target[:limit]
        """SVHN dataset is between 1 to 10: shift this to 0 to 9 to fit with neural network"""
        target = target - 1

        data = []
        for i in range(len(target)):
            data.append(b_data[:,:,:,i])
        data = np.asarray(data)
        tensor_data = torch.from_numpy(data)
        tensor_data = tensor_data.float()
        tensor_target = torch.from_numpy(target)

        loader = data_utils.TensorDataset(tensor_data, tensor_target)
        loader_dataset = data_utils.DataLoader(loader, batch_size=batch_size, shuffle = True)
        return loader_dataset
项目:KagglePlanetPytorch    作者:Mctigger    | 项目源码 | 文件源码
def get_loader(df, transformations):
    dset_val = KaggleAmazonJPGDataset(df, paths.train_jpg, transformations, divide=False)

    loader_val = DataLoader(dset_val,
                            batch_size=batch_size,
                            num_workers=12,
                            pin_memory=True,)
    return loader_val
项目:pyro    作者:uber    | 项目源码 | 文件源码
def get_data_loader(dataset_name,
                    batch_size=1,
                    dataset_transforms=None,
                    is_training_set=True,
                    shuffle=True):
    if not dataset_transforms:
        dataset_transforms = []
    trans = transforms.Compose([transforms.ToTensor()] + dataset_transforms)
    dataset = getattr(datasets, dataset_name)
    return DataLoader(
        dataset(root=DATA_DIR,
                train=is_training_set,
                transform=trans,
                download=True),
        batch_size=batch_size,
        shuffle=shuffle
    )
项目:SimGAN_pytorch    作者:AlexHex7    | 项目源码 | 文件源码
def load_data(self):
        print('=' * 50)
        print('Loading data...')
        transform = transforms.Compose([
            transforms.ImageOps.grayscale,
            transforms.Scale((cfg.img_width, cfg.img_height)),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

        syn_train_folder = torchvision.datasets.ImageFolder(root=cfg.syn_path, transform=transform)
        # print(syn_train_folder)
        self.syn_train_loader = Data.DataLoader(syn_train_folder, batch_size=cfg.batch_size, shuffle=True,
                                                pin_memory=True)
        print('syn_train_batch %d' % len(self.syn_train_loader))

        real_folder = torchvision.datasets.ImageFolder(root=cfg.real_path, transform=transform)
        # real_folder.imgs = real_folder.imgs[:2000]
        self.real_loader = Data.DataLoader(real_folder, batch_size=cfg.batch_size, shuffle=True,
                                           pin_memory=True)
        print('real_batch %d' % len(self.real_loader))
项目:two-stream-action-recognition    作者:jeffreyhuang1    | 项目源码 | 文件源码
def train(self):
        training_set = spatial_dataset(dic=self.dic_training, root_dir=self.data_path, mode='train', transform = transforms.Compose([
                transforms.RandomCrop(224),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
                ]))
        print '==> Training data :',len(training_set),'frames'
        print training_set[1][0]['img1'].size()

        train_loader = DataLoader(
            dataset=training_set, 
            batch_size=self.BATCH_SIZE,
            shuffle=True,
            num_workers=self.num_workers)
        return train_loader
项目:two-stream-action-recognition    作者:jeffreyhuang1    | 项目源码 | 文件源码
def validate(self):
        validation_set = spatial_dataset(dic=self.dic_testing, root_dir=self.data_path, mode='val', transform = transforms.Compose([
                transforms.Scale([224,224]),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
                ]))

        print '==> Validation data :',len(validation_set),'frames'
        print validation_set[1][1].size()

        val_loader = DataLoader(
            dataset=validation_set, 
            batch_size=self.BATCH_SIZE, 
            shuffle=False,
            num_workers=self.num_workers)
        return val_loader
项目:two-stream-action-recognition    作者:jeffreyhuang1    | 项目源码 | 文件源码
def train(self):
        training_set = motion_dataset(dic=self.dic_video_train, in_channel=self.in_channel, root_dir=self.data_path,
            mode='train',
            transform = transforms.Compose([
            transforms.Scale([224,224]),
            transforms.ToTensor(),
            ]))
        print '==> Training data :',len(training_set),' videos',training_set[1][0].size()

        train_loader = DataLoader(
            dataset=training_set, 
            batch_size=self.BATCH_SIZE,
            shuffle=True,
            num_workers=self.num_workers,
            pin_memory=True
            )

        return train_loader
项目:two-stream-action-recognition    作者:jeffreyhuang1    | 项目源码 | 文件源码
def val(self):
        validation_set = motion_dataset(dic= self.dic_test_idx, in_channel=self.in_channel, root_dir=self.data_path ,
            mode ='val',
            transform = transforms.Compose([
            transforms.Scale([224,224]),
            transforms.ToTensor(),
            ]))
        print '==> Validation data :',len(validation_set),' frames',validation_set[1][1].size()
        #print validation_set[1]

        val_loader = DataLoader(
            dataset=validation_set, 
            batch_size=self.BATCH_SIZE, 
            shuffle=False,
            num_workers=self.num_workers)

        return val_loader
项目:pytorch-coriander    作者:hughperkins    | 项目源码 | 文件源码
def test_sequential_batch(self):
        loader = DataLoader(self.dataset, batch_size=2, shuffle=False)
        batch_size = loader.batch_size
        for i, sample in enumerate(loader):
            idx = i * batch_size
            self.assertEqual(set(sample.keys()), {'a_tensor', 'another_dict'})
            self.assertEqual(set(sample['another_dict'].keys()), {'a_number'})

            t = sample['a_tensor']
            self.assertEqual(t.size(), torch.Size([batch_size, 4, 2]))
            self.assertTrue((t[0] == idx).all())
            self.assertTrue((t[1] == idx + 1).all())

            n = sample['another_dict']['a_number']
            self.assertEqual(n.size(), torch.Size([batch_size]))
            self.assertEqual(n[0], idx)
            self.assertEqual(n[1], idx + 1)
项目:pytorch    作者:ezyang    | 项目源码 | 文件源码
def _test_batch_sampler(self, **kwargs):
        # [(0, 1), (2, 3, 4), (5, 6), (7, 8, 9), ...]
        batches = []
        for i in range(0, 100, 5):
            batches.append(tuple(range(i, i + 2)))
            batches.append(tuple(range(i + 2, i + 5)))

        dl = DataLoader(self.dataset, batch_sampler=batches, **kwargs)
        self.assertEqual(len(dl), 40)
        for i, (input, _target) in enumerate(dl):
            if i % 2 == 0:
                offset = i * 5 // 2
                self.assertEqual(len(input), 2)
                self.assertEqual(input, self.data[offset:offset + 2])
            else:
                offset = i * 5 // 2
                self.assertEqual(len(input), 3)
                self.assertEqual(input, self.data[offset:offset + 3])
项目:pytorch    作者:ezyang    | 项目源码 | 文件源码
def test_sequential_batch(self):
        loader = DataLoader(self.dataset, batch_size=2, shuffle=False)
        batch_size = loader.batch_size
        for i, sample in enumerate(loader):
            idx = i * batch_size
            self.assertEqual(set(sample.keys()), {'a_tensor', 'another_dict'})
            self.assertEqual(set(sample['another_dict'].keys()), {'a_number'})

            t = sample['a_tensor']
            self.assertEqual(t.size(), torch.Size([batch_size, 4, 2]))
            self.assertTrue((t[0] == idx).all())
            self.assertTrue((t[1] == idx + 1).all())

            n = sample['another_dict']['a_number']
            self.assertEqual(n.size(), torch.Size([batch_size]))
            self.assertEqual(n[0], idx)
            self.assertEqual(n[1], idx + 1)
项目:sourceseparation_misc    作者:ycemsubakan    | 项目源码 | 文件源码
def get_loaders(loader_batchsize, **kwargs):
    arguments=kwargs['arguments']
    data = arguments.data

    if data == 'mnist':
        kwargs = {'num_workers': 1, 'pin_memory': True} if arguments.cuda else {}
        train_loader = torch.utils.data.DataLoader(
            datasets.MNIST('../data', train=True, download=True,
                           transform=transforms.Compose([
                               transforms.ToTensor(),
                               #transforms.Normalize((0,), (1,))
                           ])),
            batch_size=loader_batchsize, shuffle=True, **kwargs)
        test_loader = torch.utils.data.DataLoader(
            datasets.MNIST('../data', train=False, transform=transforms.Compose([
                               transforms.ToTensor(),
                               #transforms.Normalize((7,), (0.3081,))
                           ])),
            batch_size=loader_batchsize, shuffle=True, **kwargs)

    return train_loader, test_loader
项目:sourceseparation_misc    作者:ycemsubakan    | 项目源码 | 文件源码
def form_torch_mixture_dataset(MSabs, MSphase, 
                               SPCS1abs, SPCS2abs,
                               wavfls1, wavfls2, 
                               lens1, lens2, 
                               arguments):

    MSabs = torch.from_numpy(np.array(MSabs))
    MSphase = torch.from_numpy(np.array(MSphase)) 
    SPCS1abs = torch.from_numpy(np.array(SPCS1abs)) 
    SPCS2abs = torch.from_numpy(np.array(SPCS2abs)) 
    wavfls1 = torch.from_numpy(np.array(wavfls1))
    wavfls2 = torch.from_numpy(np.array(wavfls2))

    dataset = MixtureDataset(MSabs, MSphase, SPCS1abs, SPCS2abs, 
                             wavfls1, wavfls2, lens1, lens2)

    kwargs = {'num_workers': 1, 'pin_memory': True} if arguments.cuda else {}
    loader = data_utils.DataLoader(dataset, batch_size=arguments.batch_size, shuffle=False, **kwargs)

    return loader
项目:pytorch    作者:pytorch    | 项目源码 | 文件源码
def _test_batch_sampler(self, **kwargs):
        # [(0, 1), (2, 3, 4), (5, 6), (7, 8, 9), ...]
        batches = []
        for i in range(0, 100, 5):
            batches.append(tuple(range(i, i + 2)))
            batches.append(tuple(range(i + 2, i + 5)))

        dl = DataLoader(self.dataset, batch_sampler=batches, **kwargs)
        self.assertEqual(len(dl), 40)
        for i, (input, _target) in enumerate(dl):
            if i % 2 == 0:
                offset = i * 5 // 2
                self.assertEqual(len(input), 2)
                self.assertEqual(input, self.data[offset:offset + 2])
            else:
                offset = i * 5 // 2
                self.assertEqual(len(input), 3)
                self.assertEqual(input, self.data[offset:offset + 3])
项目:pytorch    作者:pytorch    | 项目源码 | 文件源码
def test_sequential_batch(self):
        loader = DataLoader(self.dataset, batch_size=2, shuffle=False)
        batch_size = loader.batch_size
        for i, sample in enumerate(loader):
            idx = i * batch_size
            self.assertEqual(set(sample.keys()), {'a_tensor', 'another_dict'})
            self.assertEqual(set(sample['another_dict'].keys()), {'a_number'})

            t = sample['a_tensor']
            self.assertEqual(t.size(), torch.Size([batch_size, 4, 2]))
            self.assertTrue((t[0] == idx).all())
            self.assertTrue((t[1] == idx + 1).all())

            n = sample['another_dict']['a_number']
            self.assertEqual(n.size(), torch.Size([batch_size]))
            self.assertEqual(n[0], idx)
            self.assertEqual(n[1], idx + 1)
项目:skorch    作者:dnouri    | 项目源码 | 文件源码
def check_data(self, _, y):
        if y is None and self.iterator_train is DataLoader:
            raise ValueError("No y-values are given (y=None). You must "
                             "implement your own DataLoader for training "
                             "(and your validation) and supply it using the "
                             "``iterator_train`` and ``iterator_valid`` "
                             "parameters respectively.")
        elif y is None:
            # The user implements its own mechanism for generating y.
            return

        # The problem with 1-dim float y is that the pytorch DataLoader will
        # somehow upcast it to DoubleTensor
        if get_dim(y) == 1:
            raise ValueError("The target data shouldn't be 1-dimensional; "
                             "please reshape (e.g. y.reshape(-1, 1).")

    # pylint: disable=signature-differs
项目:kaggle_amazon_from_space    作者:N01Z3    | 项目源码 | 文件源码
def check_aug():
    nfold = 0
    tst_dataset = CSVDataset_tst(f'../../_data/fold{nfold}/train.csv')
    tst = data.DataLoader(tst_dataset, batch_size=1, shuffle=False, num_workers=8)

    for j, val_data in enumerate(tst, 0):
        if j == 3:
            inputs, labels = val_data
            inputs, labels = inputs.numpy()[0], labels.numpy()[0]

            print(inputs.shape, labels.shape, np.amax(inputs), np.amin(inputs), np.mean(inputs))
            for i in range(13):
                plt.subplot(3, 5, 1 + i)
                plt.imshow(np.transpose(inputs[i], (1, 2, 0)))
            break
    plt.show()
项目:kaggle_amazon_from_space    作者:N01Z3    | 项目源码 | 文件源码
def check_aug():
    nfold = 0
    tst_dataset = CSVDataset_tst(f'../../_data/fold{nfold}/train.csv')
    tst = data.DataLoader(tst_dataset, batch_size=1, shuffle=False, num_workers=8)

    for j, val_data in enumerate(tst, 0):
        if j == 3:
            inputs, labels = val_data
            inputs, labels = inputs.numpy()[0], labels.numpy()[0]

            print(inputs.shape, labels.shape, np.amax(inputs), np.amin(inputs), np.mean(inputs))
            for i in range(13):
                plt.subplot(3, 5, 1 + i)
                plt.imshow(np.transpose(inputs[i], (1, 2, 0)))
            break
    plt.show()
项目:pytorch-semseg    作者:meetshah1995    | 项目源码 | 文件源码
def validate(args):

    # Setup Dataloader
    data_loader = get_loader(args.dataset)
    data_path = get_data_path(args.dataset)
    loader = data_loader(data_path, split=args.split, is_transform=True, img_size=(args.img_rows, args.img_cols))
    n_classes = loader.n_classes
    valloader = data.DataLoader(loader, batch_size=args.batch_size, num_workers=4)
    running_metrics = runningScore(n_classes)

    # Setup Model
    model = get_model(args.model_path[:args.model_path.find('_')], n_classes)
    state = convert_state_dict(torch.load(args.model_path)['model_state'])
    model.load_state_dict(state)
    model.eval()

    for i, (images, labels) in tqdm(enumerate(valloader)):
        model.cuda()
        images = Variable(images.cuda(), volatile=True)
        labels = Variable(labels.cuda(), volatile=True)

        outputs = model(images)
        pred = outputs.data.max(1)[1].cpu().numpy()
        gt = labels.data.cpu().numpy()

        running_metrics.update(gt, pred)

    score, class_iou = running_metrics.get_scores()

    for k, v in score.items():
        print(k, v)

    for i in range(n_classes):
        print(i, class_iou[i])
项目:pytorch-semseg    作者:meetshah1995    | 项目源码 | 文件源码
def train(args):

    # Setup Dataloader
    data_loader = get_loader(args.dataset)
    data_path = get_data_path(args.dataset)
    loader = data_loader(data_path, is_transform=True, img_size=(args.img_rows, args.img_cols))
    n_classes = loader.n_classes
    trainloader = data.DataLoader(loader, batch_size=args.batch_size, num_workers=4, shuffle=True)

    # Setup visdom for visualization
    if args.visdom:
        vis = visdom.Visdom()

        loss_window = vis.line(X=torch.zeros((1,)).cpu(),
                           Y=torch.zeros((1)).cpu(),
                           opts=dict(xlabel='minibatches',
                                     ylabel='Loss',
                                     title='Training Loss',
                                     legend=['Loss']))

    # Setup Model
    model = get_model(args.arch, n_classes)

    model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count()))
    model.cuda()
    optimizer = torch.optim.SGD(model.parameters(), lr=args.l_rate, momentum=0.99, weight_decay=5e-4)

    for epoch in range(args.n_epoch):
        for i, (images, labels) in enumerate(trainloader):
            images = Variable(images.cuda())
            labels = Variable(labels.cuda())

            optimizer.zero_grad()
            outputs = model(images)

            loss = cross_entropy2d(outputs, labels)

            loss.backward()
            optimizer.step()

            if args.visdom:
                vis.line(
                    X=torch.ones((1, 1)).cpu() * i,
                    Y=torch.Tensor([loss.data[0]]).unsqueeze(0).cpu(),
                    win=loss_window,
                    update='append')

            if (i+1) % 20 == 0:
                print("Epoch [%d/%d] Loss: %.4f" % (epoch+1, args.n_epoch, loss.data[0]))

        torch.save(model, "{}_{}_{}_{}.pkl".format(args.arch, args.dataset, args.feature_scale, epoch))
项目:PaintsPytorch    作者:orashi    | 项目源码 | 文件源码
def CreateDataLoader(opt):
    random.seed(opt.manualSeed)

    # folder dataset
    CTrans = transforms.Compose([
        transforms.Scale(opt.imageSize, Image.BICUBIC),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    VTrans = transforms.Compose([
        RandomSizedCrop(opt.imageSize // 4, Image.BICUBIC),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    STrans = transforms.Compose([
        transforms.Scale(opt.imageSize, Image.BICUBIC),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    dataset = ImageFolder(rootC=opt.datarootC,
                          rootS=opt.datarootS,
                          transform=CTrans,
                          vtransform=VTrans,
                          stransform=STrans
                          )

    assert dataset

    return data.DataLoader(dataset, batch_size=opt.batchSize,
                           shuffle=True, num_workers=int(opt.workers), drop_last=True)
项目:PaintsPytorch    作者:orashi    | 项目源码 | 文件源码
def CreateDataLoader(opt):
    random.seed(opt.manualSeed)

    # folder dataset
    CTrans = transforms.Compose([
        transforms.Scale(opt.imageSize, Image.BICUBIC),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    VTrans = transforms.Compose([
        RandomSizedCrop(opt.imageSize // 4, Image.BICUBIC),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    STrans = transforms.Compose([
        transforms.Scale(opt.imageSize, Image.BICUBIC),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    dataset = ImageFolder(rootC=opt.datarootC,
                          rootS=opt.datarootS,
                          transform=CTrans,
                          vtransform=VTrans,
                          stransform=STrans
                          )

    assert dataset

    return data.DataLoader(dataset, batch_size=opt.batchSize,
                           shuffle=True, num_workers=int(opt.workers), drop_last=True)
项目:PaintsPytorch    作者:orashi    | 项目源码 | 文件源码
def CreateDataLoader(opt):
    random.seed(opt.manualSeed)

    # folder dataset
    CTrans = transforms.Compose([
        transforms.Scale(opt.imageSize, Image.BICUBIC),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    VTrans = transforms.Compose([
        RandomSizedCrop(224, Image.BICUBIC),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    STrans = transforms.Compose([
        transforms.Scale(opt.imageSize, Image.BICUBIC),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    dataset = ImageFolder(rootC=opt.datarootC,
                          rootS=opt.datarootS,
                          transform=CTrans,
                          vtransform=VTrans,
                          stransform=STrans
                          )

    assert dataset

    return data.DataLoader(dataset, batch_size=opt.batchSize,
                           shuffle=True, num_workers=int(opt.workers), drop_last=True)
项目:PaintsPytorch    作者:orashi    | 项目源码 | 文件源码
def CreateDataLoader(opt):
    random.seed(opt.manualSeed)

    # folder dataset
    CTrans = transforms.Compose([
        transforms.Scale(opt.imageSize, Image.BICUBIC),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    VTrans = transforms.Compose([
        RandomSizedCrop(opt.imageSize, Image.BICUBIC),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    STrans = transforms.Compose([
        transforms.Scale(opt.imageSize, Image.BICUBIC),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    dataset = ImageFolder(rootC=opt.datarootC,
                          rootS=opt.datarootS,
                          transform=CTrans,
                          vtransform=VTrans,
                          stransform=STrans
                          )

    assert dataset

    return data.DataLoader(dataset, batch_size=opt.batchSize,
                           shuffle=True, num_workers=int(opt.workers), drop_last=True)
项目:PaintsPytorch    作者:orashi    | 项目源码 | 文件源码
def CreateDataLoader(opt):
    random.seed(opt.manualSeed)

    # folder dataset
    CTrans = transforms.Compose([
        transforms.Scale(opt.imageSize, Image.BICUBIC),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    VTrans = transforms.Compose([
        RandomSizedCrop(opt.imageSize // 4, Image.BICUBIC),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    def jitter(x):
        ran = random.uniform(0.7, 1)
        return x * ran + 1 - ran

    STrans = transforms.Compose([
        transforms.Scale(opt.imageSize, Image.BICUBIC),
        transforms.ToTensor(),
        transforms.Lambda(jitter),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    dataset = ImageFolder(rootC=opt.datarootC,
                          rootS=opt.datarootS,
                          transform=CTrans,
                          vtransform=VTrans,
                          stransform=STrans
                          )

    assert dataset

    return data.DataLoader(dataset, batch_size=opt.batchSize,
                           shuffle=True, num_workers=int(opt.workers), drop_last=True)
项目:CNN_denoise    作者:weedwind    | 项目源码 | 文件源码
def get_loader(chunk_list):
   data = []
   label = []
   for f in chunk_list:
       print ('Loading data from %s' %f)
       with h5py.File(f, 'r') as hf:
          data.append(np.asarray(hf['data']))
          label.append(np.asarray(hf['label']))
   data = torch.FloatTensor(np.concatenate(data, axis = 0))
   label = torch.FloatTensor(np.concatenate(label, axis = 0))
   print ('Total %d frames loaded' %data.size(0))

   dset_train = TensorDataset(data, label)
   loader_train = DataLoader(dset_train, batch_size = 256, shuffle = True, num_workers = 10, pin_memory = False)
   return loader_train
项目:future-price-predictor    作者:htfy96    | 项目源码 | 文件源码
def train(model, db, args, bsz=32, eph=1, use_cuda=False):
    print("Training...")

    trainloader = data_utils.DataLoader(dataset=db, batch_size=bsz, shuffle=True)
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=1e-4, momentum=0.9)
    best_loss = 100000

    for epoch in range(eph):
        running_loss = 0.0
        for i, data in enumerate(trainloader, 1):
            inputs, targets = data
            inputs = inputs.unsqueeze(1)
            targets = target_onehot_to_classnum_tensor(targets)
            if use_cuda and cuda_ava:
                inputs = Variable(inputs.float().cuda())
                targets = Variable(targets.cuda())
            else:
                inputs = Variable(inputs.float())
                targets = Variable(targets)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            running_loss += loss.data[0]
            last_loss = loss.data[0]
            if i % 100 == 0:
                print("[%d, %5d] loss: %.3f" % (epoch + 1, i, running_loss / 100))
                running_loss = 0

            if last_loss < best_loss:
                best_loss = last_loss
                acc = evaluate(model, trainloader, use_cuda)
                torch.save(model.state_dict(), os.path.join('saved_model', 'cnnT1_epoch_{}_iter_{}_loss_{}_acc_{}_{}.t7'.format(epoch + 1, i, last_loss, acc, datetime.datetime.now().strftime("%b_%d_%H:%M:%S"))))
    acc = evaluate(model, trainloader, use_cuda)
    torch.save(model.state_dict(), os.path.join('saved_model', 'cnnT1_all_acc_{}.t7'.format(acc)))

    print("Finished Training!")
项目:pytorch-dist    作者:apaszke    | 项目源码 | 文件源码
def test_sequential(self):
        self._test_sequential(DataLoader(self.dataset))
项目:pytorch-dist    作者:apaszke    | 项目源码 | 文件源码
def test_sequential_batch(self):
        self._test_sequential(DataLoader(self.dataset, batch_size=2))
项目:pytorch-dist    作者:apaszke    | 项目源码 | 文件源码
def test_shuffle(self):
        self._test_shuffle(DataLoader(self.dataset, shuffle=True))
项目:pytorch-dist    作者:apaszke    | 项目源码 | 文件源码
def test_shuffle_batch(self):
        self._test_shuffle(DataLoader(self.dataset, batch_size=2, shuffle=True))
项目:pytorch-dist    作者:apaszke    | 项目源码 | 文件源码
def test_sequential_workers(self):
        self._test_sequential(DataLoader(self.dataset, num_workers=4))
项目:pytorch-dist    作者:apaszke    | 项目源码 | 文件源码
def test_shuffle_workers(self):
        self._test_shuffle(DataLoader(self.dataset, shuffle=True, num_workers=4))
项目:pytorch-dist    作者:apaszke    | 项目源码 | 文件源码
def test_shuffle_batch_workers(self):
        self._test_shuffle(DataLoader(self.dataset, batch_size=2, shuffle=True, num_workers=4))
项目:pytorch-dist    作者:apaszke    | 项目源码 | 文件源码
def test_error(self):
        self._test_error(DataLoader(ErrorDataset(100), batch_size=2, shuffle=True))
项目:pytorch-dist    作者:apaszke    | 项目源码 | 文件源码
def test_error_workers(self):
        self._test_error(DataLoader(ErrorDataset(41), batch_size=2, shuffle=True, num_workers=4))
项目:pytorch-dist    作者:apaszke    | 项目源码 | 文件源码
def test_partial_workers(self):
        "check that workers exit even if the iterator is not exhausted"
        loader = iter(DataLoader(self.dataset, batch_size=2, num_workers=4))
        workers = loader.workers
        for i, sample in enumerate(loader):
            if i == 3:
                break
        del loader
        for w in workers:
            w.join(1.0)  # timeout of one second
            self.assertFalse(w.is_alive(), 'subprocess not terminated')
            self.assertEqual(w.exitcode, 0)
项目:tnt    作者:pytorch    | 项目源码 | 文件源码
def parallel(self, *args, **kwargs):
        return DataLoader(self, *args, **kwargs)
项目:kaggle-planet    作者:ZijunDeng    | 项目源码 | 文件源码
def main():
    training_batch_size = 352
    validation_batch_size = 352

    net = get_res152(num_classes=num_classes, snapshot_path=os.path.join(
        ckpt_path, 'epoch_15_validation_loss_0.0772_iter_1000.pth')).cuda()
    net.eval()

    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.311, 0.340, 0.299], [0.167, 0.144, 0.138])
    ])
    criterion = nn.MultiLabelSoftMarginLoss().cuda()

    train_set = MultipleClassImageFolder(split_train_dir, transform)
    train_loader = DataLoader(train_set, batch_size=training_batch_size, num_workers=16)
    batch_outputs, batch_labels = predict(net, train_loader)
    loss = criterion(batch_outputs, batch_labels)
    print 'training loss %.4f' % loss.cpu().data.numpy()[0]
    batch_outputs = batch_outputs.cpu().data.numpy()
    batch_labels = batch_labels.cpu().data.numpy()
    thretholds = find_best_threthold(batch_outputs, batch_labels)

    val_set = MultipleClassImageFolder(split_val_dir, transform)
    val_loader = DataLoader(val_set, batch_size=validation_batch_size, num_workers=16)
    batch_outputs, batch_labels = predict(net, val_loader)
    loss = criterion(batch_outputs, batch_labels)
    print 'validation loss %.4f' % loss.cpu().data.numpy()[0]
    batch_outputs = batch_outputs.cpu().data.numpy()
    batch_labels = batch_labels.cpu().data.numpy()
    sio.savemat('./val_output.mat', {'outputs': batch_outputs, 'labels': batch_labels})
    prediction = get_one_hot_prediction(batch_outputs, thretholds)
    evaluation = evaluate(prediction, batch_labels)
    print 'validation evaluation: accuracy %.4f, precision %.4f, recall %.4f, f2 %.4f' % (
        evaluation[0], evaluation[1], evaluation[2], evaluation[3])
项目:nnmnkwii    作者:r9y9    | 项目源码 | 文件源码
def test_sequence_wise_torch_data_loader():
    import torch
    from torch.utils import data as data_utils

    X, Y = _get_small_datasets(padded=False)

    class TorchDataset(data_utils.Dataset):
        def __init__(self, X, Y):
            self.X = X
            self.Y = Y

        def __getitem__(self, idx):
            return torch.from_numpy(self.X[idx]), torch.from_numpy(self.Y[idx])

        def __len__(self):
            return len(self.X)

    def __test(X, Y, batch_size):
        dataset = TorchDataset(X, Y)
        loader = data_utils.DataLoader(
            dataset, batch_size=batch_size, num_workers=1, shuffle=True)
        for idx, (x, y) in enumerate(loader):
            assert len(x.shape) == len(y.shape)
            assert len(x.shape) == 3
            print(idx, x.shape, y.shape)

    # Test with batch_size = 1
    yield __test, X, Y, 1
    # Since we have variable length frames, batch size larger than 1 causes
    # runtime error.
    yield raises(RuntimeError)(__test), X, Y, 2

    # For padded dataset, which can be reprensented by (N, T^max, D), batchsize
    # can be any number.
    X, Y = _get_small_datasets(padded=True)
    yield __test, X, Y, 1
    yield __test, X, Y, 2
项目:nnmnkwii    作者:r9y9    | 项目源码 | 文件源码
def test_frame_wise_torch_data_loader():
    import torch
    from torch.utils import data as data_utils

    X, Y = _get_small_datasets(padded=False)

    # Since torch's Dataset (and Chainer, and maybe others) assumes dataset has
    # fixed size length, i.e., implements `__len__` method, we need to know
    # number of frames for each utterance.
    # Sum of the number of frames is the dataset size for frame-wise iteration.
    lengths = np.array([len(x) for x in X], dtype=np.int)

    # For the above reason, we need to explicitly give the number of frames.
    X = MemoryCacheFramewiseDataset(X, lengths, cache_size=len(X))
    Y = MemoryCacheFramewiseDataset(Y, lengths, cache_size=len(Y))

    class TorchDataset(data_utils.Dataset):
        def __init__(self, X, Y):
            self.X = X
            self.Y = Y

        def __getitem__(self, idx):
            return torch.from_numpy(self.X[idx]), torch.from_numpy(self.Y[idx])

        def __len__(self):
            return len(self.X)

    def __test(X, Y, batch_size):
        dataset = TorchDataset(X, Y)
        loader = data_utils.DataLoader(
            dataset, batch_size=batch_size, num_workers=1, shuffle=True)
        for idx, (x, y) in enumerate(loader):
            assert len(x.shape) == 2
            assert len(y.shape) == 2

    yield __test, X, Y, 128
    yield __test, X, Y, 256
项目:KagglePlanetPytorch    作者:Mctigger    | 项目源码 | 文件源码
def __init__(self, trainer, dataset, start_epoch=0, momentum=0, batch_size=96):
        super(SemiSupervisedUpdater, self).__init__()

        self.trainer = trainer
        self.dataset = dataset
        self.start_epoch = start_epoch
        self.loader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=10, pin_memory=True)
        self.momentum = momentum
项目:KagglePlanetPytorch    作者:Mctigger    | 项目源码 | 文件源码
def get_test_loader(test_images, transformations):
    dset_test = KaggleAmazonTestDataset(test_images, paths.test_jpg, '.jpg', transformations, divide=False)
    loader_val = DataLoader(dset_test,
                            batch_size=batch_size,
                            num_workers=12,
                            pin_memory=True)
    return loader_val
项目:ParlAI    作者:facebookresearch    | 项目源码 | 文件源码
def __init__(self, opt, shared=None):
        opt['batch_sort'] = False
        super().__init__(opt, shared)
        self.use_batch_act = self.bsz > 1
        self.num_workers = opt['numworkers']
        # One can specify a collate function to use for preparing a batch
        collate_fn = opt.get('collate_fn', default_collate)
        if not shared:
            self.dataset = StreamDataset(opt)
            self.pytorch_dataloader = DataLoader(
                self.dataset,
                batch_size=self.bsz,
                shuffle=False,
                sampler=sampler.SequentialSampler(self.dataset),
                num_workers=self.num_workers,
                collate_fn=collate_fn,
                pin_memory=False,
                drop_last=False,
                )
            self.lastYs = [None] * self.bsz
        else:
            self.dataset = shared['dataset']
            self.pytorch_dataloader = shared['pytorch_dataloader']
            self.lastYs = shared['lastYs']

        self.num_batches = math.ceil(self.dataset.num_examples()/self.bsz)
        self.reset()
项目:pyro    作者:uber    | 项目源码 | 文件源码
def setup_data_loaders(dataset, use_cuda, batch_size, sup_num=None, root='./data', download=True, **kwargs):
    """
        helper function for setting up pytorch data loaders for a semi-supervised dataset
    :param dataset: the data to use
    :param use_cuda: use GPU(s) for training
    :param batch_size: size of a batch of data to output when iterating over the data loaders
    :param sup_num: number of supervised data examples
    :param root: where on the filesystem should the dataset be
    :param download: download the dataset (if it doesn't exist already)
    :param kwargs: other params for the pytorch data loader
    :return: three data loaders: (supervised data for training, un-supervised data for training,
                                  supervised data for testing)
    """
    # instantiate the dataset as training/testing sets
    if 'num_workers' not in kwargs:
        kwargs = {'num_workers': 0, 'pin_memory': False}

    cached_data = {}
    loaders = {}
    for mode in ["unsup", "test", "sup", "valid"]:
        if sup_num is None and mode == "sup":
            # in this special case, we do not want "sup" and "valid" data loaders
            return loaders["unsup"], loaders["test"]
        cached_data[mode] = dataset(root=root, mode=mode, download=download,
                                    sup_num=sup_num, use_cuda=use_cuda)
        loaders[mode] = DataLoader(cached_data[mode], batch_size=batch_size, shuffle=True, **kwargs)

    return loaders
项目:inferno    作者:inferno-pytorch    | 项目源码 | 文件源码
def train_loader(self, value):
        assert isinstance(value, DataLoader)
        self._loaders.update({'train': value})
项目:inferno    作者:inferno-pytorch    | 项目源码 | 文件源码
def validate_loader(self, value):
        assert isinstance(value, DataLoader)
        self._loaders.update({'validate': value})
项目:pytorch-tutorial    作者:yunjey    | 项目源码 | 文件源码
def get_loader(image_path, image_size, batch_size, num_workers=2):
    """Builds and returns Dataloader."""

    transform = transforms.Compose([
                    transforms.Scale(image_size),
                    transforms.ToTensor(),
                    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

    dataset = ImageFolder(image_path, transform)
    data_loader = data.DataLoader(dataset=dataset,
                                  batch_size=batch_size,
                                  shuffle=True,
                                  num_workers=num_workers)
    return data_loader
项目:LSGAN.pytorch    作者:meliketoy    | 项目源码 | 文件源码
def get_loader(image_path, image_size, batch_size, transform, num_workers=2):
    dataset = ImageFolder(image_path, transform)
    data_laoder = data.DataLoader(
        dataset = dataset,
        batch_size = batch_size,
        shuffle = True,
        num_workers = num_workers
    )

    return data_loader