百科问答小站 logo
百科问答小站 font logo



使用pytorch时,训练集数据太多达到上千万张,Dataloader加载很慢怎么办? 第1页

  

user avatar   fang-niu-wa-28-17 网友的相关建议: 
      

下面是我见到过的写得最优雅的,预加载的dataloader迭代方式可以参考下:

使用方法就和普通dataloder一样 for xxx in trainloader .

主要思想就两点 , 第一重载 _iter 和 next_ ,第二点多线程异步Queue加载

       import numbers import os import queue as Queue import threading  import mxnet as mx import numpy as np import torch from torch.utils.data import DataLoader, Dataset from torchvision import transforms   class BackgroundGenerator(threading.Thread):     def __init__(self, generator, local_rank, max_prefetch=6):         super(BackgroundGenerator, self).__init__()         self.queue = Queue.Queue(max_prefetch)         self.generator = generator         self.local_rank = local_rank         self.daemon = True         self.start()      def run(self):         torch.cuda.set_device(self.local_rank)         for item in self.generator:             self.queue.put(item)         self.queue.put(None)      def next(self):         next_item = self.queue.get()         if next_item is None:             raise StopIteration         return next_item      def __next__(self):         return self.next()      def __iter__(self):         return self   class DataLoaderX(DataLoader):     def __init__(self, local_rank, **kwargs):         super(DataLoaderX, self).__init__(**kwargs)         self.stream = torch.cuda.Stream(local_rank)         self.local_rank = local_rank      def __iter__(self):         self.iter = super(DataLoaderX, self).__iter__()         self.iter = BackgroundGenerator(self.iter, self.local_rank)         self.preload()         return self      def preload(self):         self.batch = next(self.iter, None)         if self.batch is None:             return None         with torch.cuda.stream(self.stream):             for k in range(len(self.batch)):                 self.batch[k] = self.batch[k].to(device=self.local_rank,                                                  non_blocking=True)      def __next__(self):         torch.cuda.current_stream().wait_stream(self.stream)         batch = self.batch         if batch is None:             raise StopIteration         self.preload()         return batch   class MXFaceDataset(Dataset):     def __init__(self, root_dir, local_rank):         super(MXFaceDataset, self).__init__()         self.transform = transforms.Compose(             [transforms.ToPILImage(),              transforms.RandomHorizontalFlip(),              transforms.ToTensor(),              transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),              ])         self.root_dir = root_dir         self.local_rank = local_rank         path_imgrec = os.path.join(root_dir, 'train.rec')         path_imgidx = os.path.join(root_dir, 'train.idx')         self.imgrec = mx.recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, 'r')         s = self.imgrec.read_idx(0)         header, _ = mx.recordio.unpack(s)         if header.flag > 0:             self.header0 = (int(header.label[0]), int(header.label[1]))             self.imgidx = np.array(range(1, int(header.label[0])))         else:             self.imgidx = np.array(list(self.imgrec.keys))      def __getitem__(self, index):         idx = self.imgidx[index]         s = self.imgrec.read_idx(idx)         header, img = mx.recordio.unpack(s)         label = header.label         if not isinstance(label, numbers.Number):             label = label[0]         label = torch.tensor(label, dtype=torch.long)         sample = mx.image.imdecode(img).asnumpy()         if self.transform is not None:             sample = self.transform(sample)         return sample, label      def __len__(self):         return len(self.imgidx)     




  

相关话题

  你有哪些deep learning(rnn、cnn)调参的经验? 
  如何评价 On Unifying Deep Generative Models 这篇 paper? 
  百度在深度学习上使用Xilinx FPGA? 
  如何看待李沐老师提出的「用随机梯度下降来优化人生」? 
  从今年校招来看,机器学习等算法岗位应届生超多,竞争激烈,未来 3-5 年机器学习相关就业会达到饱和吗? 
  如何理解链接预测(link prediction)? 
  Partial Multi-Label Learning是什么?它的发展史又是怎样的?最新的进展如何? 
  深度学习中,模型大了好还是小了好呢? 
  如何评价Hinton组的新工作SimCLR? 
  神经网络中 warmup 策略为什么有效;有什么理论解释么? 

前一个讨论
如何看待上海市科委、中科院上海有机所和观视频联合制作的科普微电影《无处不在的手性之有机师姐》?
下一个讨论
表哥说机械比计算机经管都好,如何看待他的言论?





© 2025-02-27 - tinynew.org. All Rights Reserved.
© 2025-02-27 - tinynew.org. 保留所有权利