''' Created on Jun 2, 2017 @author: bob ''' # These are all the modules we'll be using later. Make sure you can import them # before proceeding further. #This is getting data for image recognition used in assignment 2 from __future__ import print_function import matplotlib.pyplot as plt import numpy as np import os import sys import tarfile from IPython.display import display, Image from scipy import ndimage from sklearn.linear_model import LogisticRegression from urllib.request import urlretrieve import pickle url = 'https://commondatastorage.googleapis.com/books1000/' last_percent_reported = None data_root = '/Users/bob/Documents/TensorFlow/Data/' # Change me to store data elsewhere def download_progress_hook(count, blockSize, totalSize): """A hook to report the progress of a download. This is mostly intended for users with slow internet connections. Reports every 5% change in download progress. """ global last_percent_reported percent = int(count * blockSize * 100 / totalSize) if last_percent_reported != percent: if percent % 5 == 0: sys.stdout.write("%s%%" % percent) sys.stdout.flush() else: sys.stdout.write(".") sys.stdout.flush() last_percent_reported = percent def maybe_download(filename, expected_bytes, force=False): """Download a file if not present, and make sure it's the right size.""" dest_filename = os.path.join(data_root, filename) if force or not os.path.exists(dest_filename): print('Attempting to download:', filename) filename, _ = urlretrieve(url + filename, dest_filename, reporthook=download_progress_hook) print('\nDownload Complete!') statinfo = os.stat(dest_filename) if statinfo.st_size == expected_bytes: print('Found and verified', dest_filename) else: raise Exception( 'Failed to verify ' + dest_filename + '. Can you get to it with a browser?') return dest_filename train_filename = maybe_download('notMNIST_large.tar.gz', 247336696) test_filename = maybe_download('notMNIST_small.tar.gz', 8458043) #Kluges not needed he always returns proper filenam #train_filename = "/Users/bob/Documents/TensorFlow/Data/notMNIST_large.tar.gz" #Kluge so dont have to do again #test_filename = "/Users/bob/Documents/TensorFlow/Data/notMNIST_small.tar.gz" num_classes = 10 np.random.seed(133) def maybe_extract(filename, force=False): root = os.path.splitext(os.path.splitext(filename)[0])[0] # remove .tar.gz if os.path.isdir(root) and not force: # You may override by setting force=True. print('%s already present - Skipping extraction of %s.' % (root, filename)) else: print('Extracting data for %s. This may take a while. Please wait.' % root) tar = tarfile.open(filename) sys.stdout.flush() tar.extractall(data_root) tar.close() data_folders = [ os.path.join(root, d) for d in sorted(os.listdir(root)) if os.path.isdir(os.path.join(root, d))]## this is called list comprehension with an if look at http://www.secnetix.de/olli/Python/list_comprehensions.hawk for instance if len(data_folders) != num_classes: raise Exception( 'Expected %d folders, one per class. Found %d instead.' % ( num_classes, len(data_folders))) print(data_folders) return data_folders train_folders = maybe_extract(train_filename) test_folders = maybe_extract(test_filename) # Kluges below not needed he puts together even if not done again #train_folders=['/Users/bob/Documents/TensorFlow/Data/notMNIST_large/A', '/Users/bob/Documents/TensorFlow/Data/notMNIST_large/B', '/Users/bob/Documents/TensorFlow/Data/notMNIST_large/C', '/Users/bob/Documents/TensorFlow/Data/notMNIST_large/D', '/Users/bob/Documents/TensorFlow/Data/notMNIST_large/E', '/Users/bob/Documents/TensorFlow/Data/notMNIST_large/F', '/Users/bob/Documents/TensorFlow/Data/notMNIST_large/G', '/Users/bob/Documents/TensorFlow/Data/notMNIST_large/H', '/Users/bob/Documents/TensorFlow/Data/notMNIST_large/I', '/Users/bob/Documents/TensorFlow/Data/notMNIST_large/J'] #test_folders = ['/Users/bob/Documents/TensorFlow/Data/notMNIST_small/A', '/Users/bob/Documents/TensorFlow/Data/notMNIST_small/B', '/Users/bob/Documents/TensorFlow/Data/notMNIST_small/C', '/Users/bob/Documents/TensorFlow/Data/notMNIST_small/D', '/Users/bob/Documents/TensorFlow/Data/notMNIST_small/E', '/Users/bob/Documents/TensorFlow/Data/notMNIST_small/F', '/Users/bob/Documents/TensorFlow/Data/notMNIST_small/G', '/Users/bob/Documents/TensorFlow/Data/notMNIST_small/H', '/Users/bob/Documents/TensorFlow/Data/notMNIST_small/I', '/Users/bob/Documents/TensorFlow/Data/notMNIST_small/J'] ######################## #Pickling image_size = 28 # Pixel width and height. pixel_depth = 255.0 # Number of levels per pixel. def load_letter(folder, min_num_images):#used by maybe_pickle look at first; folder is a letter folder """Load the data for a single letter label.""" image_files = os.listdir(folder)#returns a list of files in the letter directory dataset = np.ndarray(shape=(len(image_files), image_size, image_size), dtype=np.float32)### an array of all the files in the directory; num_images = 0 for image in image_files: image_file = os.path.join(folder, image) try: image_data = (ndimage.imread(image_file).astype(float) - pixel_depth / 2) / pixel_depth #normalizing range to -1