Source code for jaclearn.datasets.image_classification.svhn

#! /usr/bin/env python3
# -*- coding: utf-8 -*-
# File   : svhn.py
# Author : Jiayuan Mao
# Email  : maojiayuan@gmail.com
# Date   : 01/19/2018
#
# This file is part of Jacinle.
# Distributed under terms of the MIT license.

import os

import numpy as np

from jacinle.io.network import download

__all__ = ['load_svhn']


svhn_web_address = {
    'train': ["http://ufldl.stanford.edu/housenumbers/train_32x32.mat",
              "train_32x32.mat", "e26dedcc434d2e4c54c9b2d4a06d8373"],
    'test': ["http://ufldl.stanford.edu/housenumbers/test_32x32.mat",
             "test_32x32.mat", "eb5a983be6a315427106f1b164d9cef3"],
    'extra': ["http://ufldl.stanford.edu/housenumbers/extra_32x32.mat",
              "extra_32x32.mat", "a93ce644f1a588dc4d68dda5feec44a7"]
}


[docs] def load_svhn(data_dir, extra=False): from scipy.io import loadmat all_set_keys = list(svhn_web_address.keys()) if not extra: all_set_keys = all_set_keys[:2] all_sets = [] for subset in all_set_keys: data_addr, data_file, data_hash = svhn_web_address[subset] dataset = os.path.join(data_dir, data_file) if not os.path.isfile(dataset): download(data_addr, data_dir, data_file, md5=data_hash) mat = loadmat(dataset) mat['X'] = np.transpose(mat['X'], [3, 0, 1, 2]) all_sets.append((np.ascontiguousarray(mat['X']), mat['y'])) return tuple(all_sets)