python – Can a neural network train against the loss function from past iterations?

I have a neural network that I am training on a loss function (i.e. loss1). And, as the optimizer continues iterating, the loss function continues decreasing as expected. But is it possible for me to create an additional loss function (i.e. loss2) that is based upon some combination of loss1 from previous iterations? For example, I may want to ensure that loss1 from the past 100 iterations follows a symmetric distribution. This would require me to store loss1 from the past 100 iterations (which I can do) and then train my optimizers on some new function of loss1 values from the past (which I cannot do). But even more simply, I am thus far unable to write a loss2 that includes past values of loss1 in any way.

Below I have written a minimal code whereby I construct and train a neural network on a loss function and then store the values of the loss function in a list (loss1_log). Now I want to train on the stored values in this list. But is it even computationally possible for the optimizer to train on these past values of loss1?

import numpy as np 
import tensorflow as tf

end_it = 1000 #number of iterations
layers = (2, 20, 20, 20, 1)

#Generate training data
len_data = 10000
x_x = np.array((np.linspace(0.,1.,len_data)))
x_y = np.array((np.linspace(0.,1.,len_data))) 
y_true = np.array((np.linspace(-0.2,0.2,len_data)))

N_train = int(len_data)
idx = np.random.choice(len_data, N_train, replace=False)

x_train = x_x.T(idx,:)
y_train = x_y.T(idx,:) 
v1_train = y_true.T(idx,:) 

sample_batch_size = int(0.01*N_train)

import logging

class NeuralNet:
    def __init__(self, x, y, v1, layers):
        X = np.concatenate((x, y), 1)  = X.min(0)
        self.ub = X.max(0)
        self.X = X
        self.x = X(:,0:1)
        self.y = X(:,1:2) 
        self.v1 = v1 
        self.layers = layers 
        self.weights_v1, self.biases_v1 = self.initialize_NN(layers) 
        self.sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=False,
        self.x_tf = tf.placeholder(tf.float32, shape=(None, self.x.shape(1)))
        self.y_tf = tf.placeholder(tf.float32, shape=(None, self.y.shape(1))) 
        self.v1_tf = tf.placeholder(tf.float32, shape=(None, self.v1.shape(1)))  
        self.v1_pred =, self.y_tf)
        self.loss1_log = ()
        self.loss1 = tf.reduce_mean(tf.square(self.v1_pred)) 
        self.loss2 = 0.0*self.loss1 #ideally tf.reduce_mean(tf.square(some function of self.loss1_log)), e.g. tf.reduce_mean(self.loss1_log(-100:-1) - np.mean(self.loss1_log(-100:-1)))
        self.loss = self.loss1 + self.loss2
        self.optimizer = tf.contrib.opt.ScipyOptimizerInterface(self.loss,
                                                                method = 'L-BFGS-B',
                                                                options = {'maxiter': 50,
                                                                           'maxfun': 50000,
                                                                           'maxcor': 50,
                                                                           'maxls': 50,
                                                                           'ftol' : 1.0 * np.finfo(float).eps})
        self.optimizer_Adam = tf.train.AdamOptimizer()
        self.train_op_Adam_v1 = self.optimizer_Adam.minimize(self.loss, var_list=self.weights_v1+self.biases_v1) 
        init = tf.global_variables_initializer() 
    def initialize_NN(self, layers):
        weights = ()
        biases = ()
        num_layers = len(layers)
        for l in range(0,num_layers-1):
            W = self.xavier_init(size=(layers(l), layers(l+1)))
            b = tf.Variable(tf.zeros((1,layers(l+1)), dtype=tf.float32), dtype=tf.float32)
        return weights, biases
    def xavier_init(self, size):
        in_dim = size(0)
        out_dim = size(1)
        xavier_stddev = np.sqrt(2/(in_dim + out_dim)) 
        return tf.Variable(tf.truncated_normal((in_dim, out_dim), stddev=xavier_stddev), dtype=tf.float32)
    def neural_net(self, X, weights, biases):
        num_layers = len(weights) + 1
        H = 2.0*(X - - - 1.0
        for l in range(0,num_layers-2):
            W = weights(l)
            b = biases(l)
            H = tf.tanh(tf.add(tf.matmul(H, W), b))
        W = weights(-1)
        b = biases(-1)
        Y = tf.add(tf.matmul(H, W), b) 
        return Y
    def net(self, x, y): 
        v1_out = self.neural_net(tf.concat((x,y), 1), self.weights_v1, self.biases_v1)
        v1 = v1_out(:,0:1)
        return v1
    def callback(self, loss):
        global Nfeval
        print(str(Nfeval)+' - Loss in loop: %.3e' % (loss))
        Nfeval += 1
    def fetch_minibatch(self, x_in, y_in, v1_in, N_train_sample):  
        idx_batch = np.random.choice(len(x_in), N_train_sample, replace=False)
        x_batch = x_in(idx_batch,:)
        y_batch = y_in(idx_batch,:) 
        v1_batch = v1_in(idx_batch,:) 
        return x_batch, y_batch, v1_batch
    def train(self, end_it):
        it = 0
        while it < end_it: 
            x_res_batch, y_res_batch, v1_res_batch = self.fetch_minibatch(self.x, self.y, self.v1, sample_batch_size) # Fetch residual mini-batch
            tf_dict = {self.x_tf: x_res_batch, self.y_tf: y_res_batch,
                       self.v1_tf: v1_res_batch}
  , tf_dict)
                                    feed_dict = tf_dict,
                                    fetches = (self.loss),
                                    loss_callback = self.callback) 
            loss1_value =, tf_dict)
            it = it + 1
    def predict(self, x_star, y_star): 
        tf_dict = {self.x_tf: x_star, self.y_tf: y_star}
        v1_star =, tf_dict)  
        return v1_star

model = NeuralNet(x_train, y_train, v1_train, layers)
Nfeval = 1