''' Created on Aug 6, 2017 @author: bob ''' import os os.environ['TF_CPP_MIN_LOG_LEVEL']='2' #to avoid warnings about compilation import numpy as np import tensorflow as tf rng = np.random N = 4 # training sample size feats = 2 # number of input variables inps= np.array([0,0,0,1,1,0,1,1]) #=============================================================================== # feats = 3 # number of input variables # inps= np.array([0,0,0,0,1,0,1,0,0,1,1,1])#add an and feature to the xor result is can train #=============================================================================== inps= np.reshape(inps,(N,feats)) inps=inps.astype(np.float32) print("original inputs \n",inps) #we will multiply on the right in tensorflow #outps= np.array([0,1,1,0]) #exclusive or outps= np.array([0,1,1,1])#and,or but not exclusive or work; This is what I called outps print("original outps \n",outps) print("shape of array \n",outps.shape) #outps=np.reshape(outps,(4,1)) #print("shape of array 4,1 \n",outps.shape)#doesnt work in program outps=np.reshape(outps,(N,1)) outps=outps.astype(np.float32) print("reshaped outps \n",outps) print("shape of array AFTER Reshaped 4, \n",outps.shape) # generate a dataset: D = (input_values, target_class) training_steps = 1000 #=============================================================================== # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Tensorflow graph #=============================================================================== # Declare symbolic variables w = tf.Variable(tf.truncated_normal([feats, 1]),name='weights')# default tf.float32 b = tf.Variable(0, dtype=tf.float32,name="biases") x = tf.placeholder(dtype=tf.float32,shape=(N,feats),name='x')#all the inputs note are four of them y= tf.placeholder(dtype=tf.float32,shape=(N,1),name='y') # the corresponding correct answers # Construct expression graph p_1 = 1 / (1 + tf.exp(-tf.matmul(x, w) - b)) # Probability that target = 1; 4 by 1 matrix each "row" a probability prediction = p_1 > 0.5 # The prediction thresholded xent = -y * tf.log(p_1) - (1-y) * tf.log(1-p_1) # Cross-entropy loss function; Manipulation of maximum likelihood taking logs and negative sign #to minimize result; #-- note when I didnt make the y (N,1) totally screwed up per usual gave 4X4 for entropy cost = tf.reduce_mean(xent) + 0.01 * tf.reduce_mean(tf.square(w))# The cost to minimize with an L2 regularization #optimizer = tf.train.GradientDescentOptimizer(0.1).minimize(cost) derivW= tf.gradients(cost,w,name='derivW') derivb= tf.gradients(cost,b,name='derivb') # Compute the gradient of the cost newW=w-.1*derivW[0] #I needed reduce sum before but here they both should be vectors unless I have to take deivW[0] or something trainw=tf.assign(w,newW) newb=b-.1*derivb[0] trainb=tf.assign(b,newb) #=============================================================================== # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # End of Creating Graph #=============================================================================== # Compile init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) # reset values to wrong # Train for i in range(training_steps): feed_dict= {x:inps, y:outps} #_, ww,bb,p1,xentropy = sess.run([optimizer, w, b,p_1,xent],feed_dict=feed_dict) bb,ww,dw,db,tw,tb=sess.run([b,w,derivW,derivb,trainw,trainb],feed_dict=feed_dict )# want trainw and trainb so that the assign is run #print("\n\n new w b \n",ww," \n\n ",bb) #print("\n\n output ",p1,"shape ",p1.shape) #print("\n\n entropy",xentropy) #print("\n\n\n new dw db \n",dw," \n \n",db) print("\n\n\n\n\n Final model: b values \n\n") print(bb) print("target values for D:") print(outps) print("prediction on D:") feed_dict= {x:inps} pred=sess.run(prediction,feed_dict=feed_dict) print(pred) print("correct answers when zero from training ") print(pred.astype(int)-outps) """ Homework 1. change using a minimizing of L2 difference. This is normal regression. What logical expressions can you learn. 2. See if regularization makes any difference 3. Add a couple of random digits to the input for XOR of first two digits. See if you can fit this situation with 4 test samples. Can you test to see if it generalizes.What if you have more than 4 test samples? (Look at my Theano/XORRandomExtraDimensions.py) 4. Can you look at derivatives to tell when you are close to a min and stop the routine """