pytorch learn looper

This code contains routines for cnn training. Could you please indicate the things that you find bad or ugly? Thank you.

Trainer class (object):

def __init __ (auto, criterion,
metric,
optimizer,
model name,
model,
base_checkpoint_name = None,
device = 0,
dummy_input = None):
& # 39; & # 39; & # 39;
: param watcher_env: environment for visdom
: param criteria - loss function
& # 39; & # 39; & # 39;
if base_checkpoint_name is None:
self.base_checkpoint_name = model_name
other:
self.base_checkpoint_name = base_checkpoint_name

self.metric = metric
self.criterion = criterion
self.optimizer = optimizer
self.scheduler = ReduceLROnPlateau (optimizer, patience = 2, verbose = True)
self.best_loss = np.inf
self.model_name = model_name
self.device = device
self.epoch_num = 0
self.model = model

self.logger = create_logger (model_name + & # 39; .log & # 39;)
self.writer = SummaryWriter (log_dir = & # 39; / tmp / runs / & # 39;)
self.counters = {}

if dummy_input is not None:
self._plot_graph (dummy_input)

@staticmethod
def save_checkpoint (state, name):
print (save state at & # 39 ;, name)
torch.save (state, name)

def get_checkpoint_name (auto, loss):
return self.base_checkpoint_name + & # 39; _best.pth.tar & # 39;

def is_best (self, avg_loss):
best = avg_loss <self.best_loss
if better:
self.best_loss = avg_loss

best return

def validate (self, val_loader):
batch_time = AverageMeter ()
losses = AverageMeter ()
metric = AverageMeter ()

self.model.eval ()

end = time.time ()
tqdm_val_loader = tqdm (enumerate (val_loader))
for batch_idx, (input, target) in tqdm_val_loader:
with torch.no_grad ():
input_var = input.to (self.device)
target_var = target.to (self.device)

output = self.model (input_var)

loss = self.criterion (output, target_var)
loss_scalar = loss.item ()
loss.update (loss_scalar)
metric_val = self.metric (output, target_var)
metrics.update (metric_val)
tqdm_val_loader.set_description (loss of value:% s, metric value:% s%)
(str (loss_scalar), str (metric_val)))

batch_time.update (time.time () - end)

self._log_data (input, target, output, val_it_data)
self._log_metric ({
& # 39; metric & # 39 ;: metric_val,
& # 39; loss & # 39 ;: loss_scalar,
& # 39; batch_time & # 39 ;: time.time () - end
}, & # 39; val_it_metric & # 39;)

end = time.time ()

self._log_metric ({
Metric: metrics.avg,
"Loss": losses.avg,
& # 39; batch_time & # 39 ;: batch_time.avg
}, & # 39; val_epoch_metric & # 39;)

self.scheduler.step (loss.avg)

if self.is_best (loss.avg):
self.save_checkpoint (self.model.state_dict (), self.get_checkpoint_name (loss.avg))

self.epoch_num + = 1
back loss.avg, metrics.avg

def update_train_epoch_stats (auto, loss, metric):
self.epoch_train_losses.append (loss)
self.epoch_train_metrics.append (metric)

def train (self, train_loader):
batch_time, data_time, losses, metric = AverageMeter (), AverageMeter (), AverageMeter (), AverageMeter ()

self.model.train ()

end = time.time ()
train_tqdm_iterator = tqdm (enumerate (train_loader))
for batch_idx, (input, target) in train_tqdm_iterator:
data_time.update (time.time () - end)

input_var = input.to (self.device)
target_var = target.to (self.device)

output = self.model (input_var)

loss = self.criterion (output, target_var)

self.optimizer.zero_grad ()
loss.backward ()
self.optimizer.step ()

with torch.no_grad ():
loss_scalar = loss.item ()
loss.update (loss_scalar)
metric_val = self.metric (output, target_var) # todo - add an output dimention assertion
metric.update (metric_val)
train_tqdm_iterator.set_description (train loss:% s, train metric:% s%)
(str (loss_scalar), str (metric_val)))

batch_time.update (time.time () - end)
end = time.time ()

self._log_data (input, target, output, train_it_data & # 39;)
self._log_metric ({
& # 39; metric & # 39 ;: metric_val,
& # 39; loss & # 39 ;: loss_scalar,
& # 39; batch_time & # 39 ;: time.time () - end
}, & # 39; train_it_metric & # 39;)

self._log_metric ({
& # 39; metric & # 39 ;: metric.avg,
"Loss": losses.avg,
& # 39; batch_time & # 39 ;: batch_time.avg
}, & # 39; train_epoch_metric & # 39;)
back loss.avg, metric.avg

def _log_data (self, input, target, output, tag):
it = self._get_it (tag)
self.writer.add_image (tag, entry[:, 0:3, :, :], he)

def _log_metric (self, metrics_dict, tag):
it = self._get_it (tag)

result = & # 39; tag: & # 39; + tag
for k in metrics_dict:
self.writer.add_scalar (tag + _ + k, metrics_dict[k], he)
result + = & # 39;, & # 39; + k + & # 39; = & # 39; + str (metrics_dict[k])

result + = & # 39;, iteration & # 39; + str (it)

self.logger.debug (result)

def _get_it (self, tag):
if tag in self.counters.keys ():
result = self.counters[tag]
        counters[tag] + = 1
returned result
other:
counters[tag] = 0
returns 0

def _plot_graph (self, dummy_input):
self.writer.add_graph (self.model, dummy_input)