Rewrote the image loader+processor

Saumitro Dasgupta · Saumitro Dasgupta · commit 2d8649c2c757 · 2016-05-29T19:28:26.000-07:00
The previous one was (insanely) sub-optimal
diff --git a/examples/imagenet/classify.py b/examples/imagenet/classify.py
@@ -38,18 +38,30 @@ def classify(model_data_path, image_paths):
     # Construct the network
     net = models.GoogleNet({'data': input_node})
 
+    # Create an image producer (loads and processes images in parallel)
+    image_producer = dataset.ImageProducer(image_paths=image_paths, data_spec=spec)
+
     with tf.Session() as sesh:
+        # Start the image processing workers
+        coordinator = tf.train.Coordinator()
+        threads = image_producer.start(session=sesh, coordinator=coordinator)
+
         # Load the converted parameters
         print('Loading the model')
         net.load(model_data_path, sesh)
+
         # Load the input image
         print('Loading the images')
-        input_images = dataset.load_images(image_paths, spec).eval()
+        indices, input_images = image_producer.get(sesh)
+
         # Perform a forward pass through the network to get the class probabilities
         print('Classifying')
         probs = sesh.run(net.get_output(), feed_dict={input_node: input_images})
-        display_results(image_paths, probs)
+        display_results([image_paths[i] for i in indices], probs)
 
+        # Stop the worker threads
+        coordinator.request_stop()
+        coordinator.join(threads, stop_grace_period_secs=2)
 
 def main():
     # Parse arguments
diff --git a/examples/imagenet/dataset.py b/examples/imagenet/dataset.py
@@ -5,36 +5,14 @@
 import tensorflow as tf
 
 
-def read_image(path, to_bgr=True):
-    '''Returns the image at the given path as a tensor.'''
-    # Read the file
-    file_data = tf.read_file(path)
-    # Figure out the image format from the extension
-    ext = osp.splitext(path)[-1].lower()
-    if ext == '.png':
-        decoder = tf.image.decode_png
-    elif ext in ('.jpg', '.jpeg'):
-        decoder = tf.image.decode_jpeg
-    else:
-        raise ValueError('Unsupported image extension: {}'.format(ext))
-    img = decoder(file_data, channels=3)
-    if to_bgr:
-        # Convert from RGB channel ordering to BGR
-        # This matches, for instance, how OpenCV orders the channels.
-        img = tf.reverse(img, [False, False, True])
-    return img
-
-
-def _load_image(path, scale, isotropic, crop, mean):
-    '''Loads and pre-processes the image at the given path.
+def process_image(img, scale, isotropic, crop, mean):
+    '''Crops, scales, and normalizes the given image.
     scale : The image wil be first scaled to this size.
             If isotropic is true, the smaller side is rescaled to this,
             preserving the aspect ratio.
     crop  : After scaling, a central crop of this size is taken.
     mean  : Subtracted from the image
     '''
-    # Read in the image
-    img = read_image(path)
     # Rescale
     if isotropic:
         img_shape = tf.to_float(tf.shape(img)[:2])
@@ -52,22 +30,136 @@ def _load_image(path, scale, isotropic, crop, mean):
     return tf.to_float(img) - mean
 
 
-def load_image(path, spec):
-    '''Load a single image, processed based on the given spec.'''
-    return _load_image(path=path,
-                       scale=spec.scale_size,
-                       isotropic=spec.isotropic,
-                       crop=spec.crop_size,
-                       mean=spec.mean)
+class ImageProducer(object):
+    '''
+    Loads and processes batches of images in parallel.
+    '''
+
+    def __init__(self, image_paths, data_spec, num_concurrent=4, batch_size=None, labels=None):
+        # The data specifications describe how to process the image
+        self.data_spec = data_spec
+        # A list of full image paths
+        self.image_paths = image_paths
+        # An optional list of labels corresponding to each image path
+        self.labels = labels
+        # A boolean flag per image indicating whether its a JPEG or PNG
+        self.extension_mask = self.create_extension_mask(self.image_paths)
+        # Create the loading and processing operations
+        self.setup(batch_size=batch_size, num_concurrent=num_concurrent)
+
+    def setup(self, batch_size, num_concurrent):
+        # Validate the batch size
+        num_images = len(self.image_paths)
+        batch_size = min(num_images, batch_size or self.data_spec.batch_size)
+        if num_images % batch_size != 0:
+            raise ValueError(
+                'The total number of images ({}) must be divisible by the batch size ({}).'.format(
+                    num_images, batch_size))
+        self.num_batches = num_images / batch_size
+
+        # Create a queue that will contain image paths (and their indices and extension indicator)
+        self.path_queue = tf.FIFOQueue(capacity=num_images,
+                                       dtypes=[tf.int32, tf.bool, tf.string],
+                                       name='path_queue')
+
+        # Enqueue all image paths, along with their indices
+        indices = tf.range(num_images)
+        self.enqueue_paths_op = self.path_queue.enqueue_many([indices, self.extension_mask,
+                                                              self.image_paths])
+        # Close the path queue (no more additions)
+        self.close_path_queue_op = self.path_queue.close()
+
+        # Create an operation that dequeues a single path and returns a processed image
+        (idx, processed_image) = self.process()
+
+        # Create a queue that will contain the processed images (and their indices)
+        image_shape = (self.data_spec.crop_size, self.data_spec.crop_size, self.data_spec.channels)
+        processed_queue = tf.FIFOQueue(capacity=int(np.ceil(num_images / float(num_concurrent))),
+                                       dtypes=[tf.int32, tf.float32],
+                                       shapes=[(), image_shape],
+                                       name='processed_queue')
+
+        # Enqueue the processed image and path
+        enqueue_processed_op = processed_queue.enqueue([idx, processed_image])
+
+        # Create a dequeue op that fetches a batch of processed images off the queue
+        self.dequeue_op = processed_queue.dequeue_many(batch_size)
 
+        # Create a queue runner to perform the processing operations in parallel
+        num_concurrent = min(num_concurrent, num_images)
+        self.queue_runner = tf.train.QueueRunner(processed_queue,
+                                                 [enqueue_processed_op] * num_concurrent)
 
-def load_images(paths, spec):
-    '''Load multiple images, processed based on the given spec.'''
-    return tf.pack([load_image(path, spec) for path in paths])
+    def start(self, session, coordinator, num_concurrent=4):
+        '''Start the processing worker threads.'''
+        # Queue all paths
+        session.run(self.enqueue_paths_op)
+        # Close the path queue
+        session.run(self.close_path_queue_op)
+        # Start the queue runner and return the created threads
+        return self.queue_runner.create_threads(session, coord=coordinator, start=True)
 
+    def get(self, session):
+        '''
+        Get a single batch of images along with their indices. If a set of labels were provided,
+        the corresponding labels are returned instead of the indices.
+        '''
+        (indices, images) = session.run(self.dequeue_op)
+        if self.labels is not None:
+            labels = [self.labels[idx] for idx in indices]
+            return (labels, images)
+        return (indices, images)
 
-class ImageNet(object):
-    '''Iterates over the ImageNet validation set.'''
+    def batches(self, session):
+        '''Yield a batch until no more images are left.'''
+        for _ in xrange(self.num_batches):
+            yield self.get(session=session)
+
+    def load_image(self, image_path, is_jpeg):
+        # Read the file
+        file_data = tf.read_file(image_path)
+        # Decode the image data
+        img = tf.cond(is_jpeg,
+                      lambda: tf.image.decode_jpeg(file_data, channels=3),
+                      lambda: tf.image.decode_png(file_data, channels=3))
+        if self.data_spec.expects_bgr:
+            # Convert from RGB channel ordering to BGR
+            # This matches, for instance, how OpenCV orders the channels.
+            img = tf.reverse(img, [False, False, True])
+        return img
+
+    def process(self):
+        # Dequeue a single image path
+        idx, is_jpeg, image_path = self.path_queue.dequeue()
+        # Load the image
+        img = self.load_image(image_path, is_jpeg)
+        # Process the image
+        processed_img = process_image(img=img,
+                                      scale=self.data_spec.scale_size,
+                                      isotropic=self.data_spec.isotropic,
+                                      crop=self.data_spec.crop_size,
+                                      mean=self.data_spec.mean)
+        # Return the processed image, along with its index
+        return (idx, processed_img)
+
+    @staticmethod
+    def create_extension_mask(paths):
+
+        def is_jpeg(path):
+            extension = osp.splitext(path)[-1].lower()
+            if extension in ('.jpg', '.jpeg'):
+                return True
+            if not extension == '.png':
+                raise ValueError('Unsupported image format: {}'.format(extension))
+            return False
+
+        return [is_jpeg(p) for p in paths]
+
+    def __len__(self):
+        return len(self.image_paths)
+
+
+class ImageNetProducer(ImageProducer):
 
     def __init__(self, val_path, data_path, data_spec):
         # Read in the ground truth labels for the validation set
@@ -76,19 +168,10 @@ def __init__(self, val_path, data_path, data_spec):
         gt_pairs = [line.split() for line in gt_lines]
         # Get the full image paths
         # You will need a copy of the ImageNet validation set for this.
-        self.image_paths = [osp.join(data_path, p[0]) for p in gt_pairs]
+        image_paths = [osp.join(data_path, p[0]) for p in gt_pairs]
         # The corresponding ground truth labels
-        self.labels = np.array([int(p[1]) for p in gt_pairs])
-        # The data specifications for the model being validated (for preprocessing)
-        self.data_spec = data_spec
-
-    def batches(self, n):
-        '''Yields a batch of up to n preprocessed image tensors and their ground truth labels.'''
-        for i in xrange(0, len(self.image_paths), n):
-            images = load_images(self.image_paths[i:i + n], self.data_spec)
-            labels = self.labels[i:i + n]
-            yield (images, labels)
-
-    def __len__(self):
-        '''Returns the number of instances in the validation set.'''
-        return len(self.labels)
+        labels = np.array([int(p[1]) for p in gt_pairs])
+        # Initialize base
+        super(ImageNetProducer, self).__init__(image_paths=image_paths,
+                                               data_spec=data_spec,
+                                               labels=labels)
diff --git a/examples/imagenet/models/helper.py b/examples/imagenet/models/helper.py
@@ -12,10 +12,18 @@
 from nin import NiN
 from resnet import ResNet50, ResNet101, ResNet152
 
+
 class DataSpec(object):
     '''Input data specifications for an ImageNet model.'''
 
-    def __init__(self, batch_size, scale_size, crop_size, isotropic, channels=3, mean=None):
+    def __init__(self,
+                 batch_size,
+                 scale_size,
+                 crop_size,
+                 isotropic,
+                 channels=3,
+                 mean=None,
+                 bgr=True):
         # The recommended batch size for this model
         self.batch_size = batch_size
         # The image should be scaled to this size first during preprocessing
@@ -31,11 +39,15 @@ def __init__(self, batch_size, scale_size, crop_size, isotropic, channels=3, mea
         # Some of the earlier models (like AlexNet) used a spatial three-channeled mean.
         # However, using just the per-channel mean values instead doesn't affect things too much.
         self.mean = mean if mean is not None else np.array([104., 117., 124.])
+        # Whether this model expects images to be in BGR order
+        self.expects_bgr = True
+
 
 def alexnet_spec(batch_size=500):
     '''Parameters used by AlexNet and its variants.'''
     return DataSpec(batch_size=batch_size, scale_size=256, crop_size=227, isotropic=False)
 
+
 def std_spec(batch_size, isotropic=True):
     '''Parameters commonly used by "post-AlexNet" architectures.'''
     return DataSpec(batch_size=batch_size, scale_size=256, crop_size=224, isotropic=isotropic)
@@ -47,21 +59,13 @@ def std_spec(batch_size, isotropic=True):
 # These specifications are based on how the models were trained.
 # The recommended batch size is based on a Titan X (12GB).
 MODEL_DATA_SPECS = {
-
     AlexNet: alexnet_spec(),
-
     CaffeNet: alexnet_spec(),
-
     GoogleNet: std_spec(batch_size=200, isotropic=False),
-
     ResNet50: std_spec(batch_size=25),
-
     ResNet101: std_spec(batch_size=25),
-
     ResNet152: std_spec(batch_size=25),
-
     NiN: std_spec(batch_size=500),
-
     VGG16: std_spec(batch_size=224)
 }
 
diff --git a/examples/imagenet/validate.py b/examples/imagenet/validate.py
@@ -34,14 +34,14 @@ def load_model(name):
     return NetClass({'data': data_node})
 
 
-def validate(net, model_path, images, top_k=5):
+def validate(net, model_path, image_producer, top_k=5):
     '''Compute the top_k classification accuracy for the given network and images.'''
     # Get the data specifications for given network
     spec = models.get_data_spec(model_instance=net)
     # Get the input node for feeding in the images
     input_node = net.inputs['data']
     # Create a placeholder for the ground truth labels
-    label_node = tf.placeholder(tf.int32, shape=(spec.batch_size,))
+    label_node = tf.placeholder(tf.int32)
     # Get the output of the network (class probabilities)
     probs = net.get_output()
     # Create a top_k accuracy node
@@ -51,21 +51,29 @@ def validate(net, model_path, images, top_k=5):
     # The number of correctly classified images
     correct = 0
     # The total number of images
-    total = len(images)
+    total = len(image_producer)
+
     with tf.Session() as sesh:
+        coordinator = tf.train.Coordinator()
         # Load the converted parameters
-        net.load(model_path, sesh)
+        net.load(data_path=model_path, session=sesh)
+        # Start the image processing workers
+        threads = image_producer.start(session=sesh, coordinator=coordinator)
         # Iterate over and classify mini-batches
-        for idx, (images, labels) in enumerate(images.batches(spec.batch_size)):
+        for (labels, images) in image_producer.batches(sesh):
             correct += np.sum(sesh.run(top_k_op,
-                                       feed_dict={input_node: images.eval(),
+                                       feed_dict={input_node: images,
                                                   label_node: labels}))
-            count += images.get_shape()[0].value
+            count += len(labels)
             cur_accuracy = float(correct) * 100 / count
             print('{:>6}/{:<6} {:>6.2f}%'.format(count, total, cur_accuracy))
+        # Stop the worker threads
+        coordinator.request_stop()
+        coordinator.join(threads, stop_grace_period_secs=2)
     print('Top {} Accuracy: {}'.format(top_k, float(correct) / total))
 
 
+
 def main():
     # Parse arguments
     parser = argparse.ArgumentParser()
@@ -82,10 +90,12 @@ def main():
 
     # Load the dataset
     data_spec = models.get_data_spec(model_instance=net)
-    images = dataset.ImageNet(args.val_gt, args.imagenet_data_dir, data_spec)
+    image_producer = dataset.ImageNetProducer(val_path=args.val_gt,
+                                              data_path=args.imagenet_data_dir,
+                                              data_spec=data_spec)
 
     # Evaluate its performance on the ILSVRC12 validation set
-    validate(net, args.model_path, images)
+    validate(net, args.model_path, image_producer)
 
 
 if __name__ == '__main__':