tensorboard.py 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221
  1. import fastai
  2. from fastai import *
  3. from fastai.vision import *
  4. from fastai.callbacks import *
  5. from fastai.vision.gan import *
  6. from fastai.core import *
  7. import statistics
  8. from .images import ModelImageSet
  9. import torchvision.utils as vutils
  10. from tensorboardX import SummaryWriter
  11. class ModelGraphVisualizer():
  12. def __init__(self):
  13. return
  14. def write_model_graph_to_tensorboard(self, md:DataBunch, model:nn.Module, tbwriter:SummaryWriter):
  15. try:
  16. x,y = md.one_batch(DatasetType.Valid, detach=False, denorm=False)
  17. tbwriter.add_graph(model, x)
  18. except Exception as e:
  19. print(("Failed to generate graph for model: {0}. Note that there's an outstanding issue with "
  20. + "scopes being addressed here: https://github.com/pytorch/pytorch/pull/12400").format(e))
  21. class ModelHistogramVisualizer():
  22. def __init__(self):
  23. return
  24. def write_tensorboard_histograms(self, model:nn.Module, iter_count:int, tbwriter:SummaryWriter):
  25. for name, param in model.named_parameters():
  26. tbwriter.add_histogram('/weights/' + name, param, iter_count)
  27. class ModelStatsVisualizer():
  28. def __init__(self):
  29. return
  30. def write_tensorboard_stats(self, model:nn.Module, iter_count:int, tbwriter:SummaryWriter):
  31. gradients = [x.grad for x in model.parameters() if x.grad is not None]
  32. gradient_nps = [to_np(x.data) for x in gradients]
  33. if len(gradients) == 0:
  34. return
  35. avg_norm = sum(x.data.norm() for x in gradients)/len(gradients)
  36. tbwriter.add_scalar('/gradients/avg_norm', avg_norm, iter_count)
  37. median_norm = statistics.median(x.data.norm() for x in gradients)
  38. tbwriter.add_scalar('/gradients/median_norm', median_norm, iter_count)
  39. max_norm = max(x.data.norm() for x in gradients)
  40. tbwriter.add_scalar('/gradients/max_norm', max_norm, iter_count)
  41. min_norm = min(x.data.norm() for x in gradients)
  42. tbwriter.add_scalar('/gradients/min_norm', min_norm, iter_count)
  43. num_zeros = sum((np.asarray(x)==0.0).sum() for x in gradient_nps)
  44. tbwriter.add_scalar('/gradients/num_zeros', num_zeros, iter_count)
  45. avg_gradient= sum(x.data.mean() for x in gradients)/len(gradients)
  46. tbwriter.add_scalar('/gradients/avg_gradient', avg_gradient, iter_count)
  47. median_gradient = statistics.median(x.data.median() for x in gradients)
  48. tbwriter.add_scalar('/gradients/median_gradient', median_gradient, iter_count)
  49. max_gradient = max(x.data.max() for x in gradients)
  50. tbwriter.add_scalar('/gradients/max_gradient', max_gradient, iter_count)
  51. min_gradient = min(x.data.min() for x in gradients)
  52. tbwriter.add_scalar('/gradients/min_gradient', min_gradient, iter_count)
  53. class ImageGenVisualizer():
  54. def output_image_gen_visuals(self, learn:Learner, trn_batch:Tuple, val_batch:Tuple, iter_count:int, tbwriter:SummaryWriter):
  55. self._output_visuals(learn=learn, batch=val_batch, iter_count=iter_count, tbwriter=tbwriter, ds_type=DatasetType.Valid)
  56. self._output_visuals(learn=learn, batch=trn_batch, iter_count=iter_count, tbwriter=tbwriter, ds_type=DatasetType.Train)
  57. def _output_visuals(self, learn:Learner, batch:Tuple, iter_count:int, tbwriter:SummaryWriter, ds_type: DatasetType):
  58. image_sets = ModelImageSet.get_list_from_model(learn=learn, batch=batch, ds_type=ds_type)
  59. self._write_tensorboard_images(image_sets=image_sets, iter_count=iter_count, tbwriter=tbwriter, ds_type=ds_type)
  60. def _write_tensorboard_images(self, image_sets:[ModelImageSet], iter_count:int, tbwriter:SummaryWriter, ds_type: DatasetType):
  61. orig_images = []
  62. gen_images = []
  63. real_images = []
  64. for image_set in image_sets:
  65. orig_images.append(image_set.orig.px)
  66. gen_images.append(image_set.gen.px)
  67. real_images.append(image_set.real.px)
  68. prefix = str(ds_type)
  69. tbwriter.add_image(prefix + ' orig images', vutils.make_grid(orig_images, normalize=True), iter_count)
  70. tbwriter.add_image(prefix + ' gen images', vutils.make_grid(gen_images, normalize=True), iter_count)
  71. tbwriter.add_image(prefix + ' real images', vutils.make_grid(real_images, normalize=True), iter_count)
  72. #--------Below are what you actually want ot use, in practice----------------#
  73. class ModelTensorboardStatsWriter():
  74. def __init__(self, base_dir: Path, module: nn.Module, name: str, stats_iters: int=10):
  75. self.base_dir = base_dir
  76. self.name = name
  77. log_dir = base_dir/name
  78. self.tbwriter = SummaryWriter(log_dir=str(log_dir))
  79. self.hook = module.register_forward_hook(self.forward_hook)
  80. self.stats_iters = stats_iters
  81. self.iter_count = 0
  82. self.model_vis = ModelStatsVisualizer()
  83. def forward_hook(self, module:nn.Module, input, output):
  84. self.iter_count += 1
  85. if self.iter_count % self.stats_iters == 0:
  86. self.model_vis.write_tensorboard_stats(module, iter_count=self.iter_count, tbwriter=self.tbwriter)
  87. def close(self):
  88. self.tbwriter.close()
  89. self.hook.remove()
  90. class GANTensorboardWriter(LearnerCallback):
  91. def __init__(self, learn:Learner, base_dir:Path, name:str, stats_iters:int=10,
  92. visual_iters:int=200, weight_iters:int=1000):
  93. super().__init__(learn=learn)
  94. self.base_dir = base_dir
  95. self.name = name
  96. log_dir = base_dir/name
  97. self.tbwriter = SummaryWriter(log_dir=str(log_dir))
  98. self.stats_iters = stats_iters
  99. self.visual_iters = visual_iters
  100. self.weight_iters = weight_iters
  101. self.img_gen_vis = ImageGenVisualizer()
  102. self.graph_vis = ModelGraphVisualizer()
  103. self.weight_vis = ModelHistogramVisualizer()
  104. self.data = None
  105. def on_batch_end(self, iteration, metrics, **kwargs):
  106. if iteration==0:
  107. return
  108. trainer = self.learn.gan_trainer
  109. generator = trainer.generator
  110. critic = trainer.critic
  111. recorder = trainer.recorder
  112. #one_batch is extremely slow. this is an optimization
  113. update_batches = self.data is not self.learn.data
  114. if update_batches:
  115. self.data = self.learn.data
  116. self.trn_batch = self.learn.data.one_batch(DatasetType.Train, detach=False, denorm=False)
  117. self.val_batch = self.learn.data.one_batch(DatasetType.Valid, detach=False, denorm=False)
  118. if iteration % self.stats_iters == 0:
  119. if len(recorder.losses) > 0:
  120. trn_loss = to_np((recorder.losses[-1:])[0])
  121. self.tbwriter.add_scalar('/loss/trn_loss', trn_loss, iteration)
  122. if len(recorder.val_losses) > 0:
  123. val_loss = (recorder.val_losses[-1:])[0]
  124. self.tbwriter.add_scalar('/loss/val_loss', val_loss, iteration)
  125. #TODO: Figure out how to do metrics here and gan vs critic loss
  126. #values = [met[-1:] for met in recorder.metrics]
  127. if iteration % self.visual_iters == 0:
  128. gen_mode = trainer.gen_mode
  129. trainer.switch(gen_mode=True)
  130. self.img_gen_vis.output_image_gen_visuals(learn=self.learn, trn_batch=self.trn_batch, val_batch=self.val_batch,
  131. iter_count=iteration, tbwriter=self.tbwriter)
  132. trainer.switch(gen_mode=gen_mode)
  133. if iteration % self.weight_iters == 0:
  134. self.weight_vis.write_tensorboard_histograms(model=generator, iter_count=iteration, tbwriter=self.tbwriter)
  135. self.weight_vis.write_tensorboard_histograms(model=critic, iter_count=iteration, tbwriter=self.tbwriter)
  136. class ImageGenTensorboardWriter(LearnerCallback):
  137. def __init__(self, learn:Learner, base_dir:Path, name:str, stats_iters:int=25,
  138. visual_iters:int=200, weight_iters:int=25):
  139. super().__init__(learn=learn)
  140. self.base_dir = base_dir
  141. self.name = name
  142. log_dir = base_dir/name
  143. self.tbwriter = SummaryWriter(log_dir=str(log_dir))
  144. self.stats_iters = stats_iters
  145. self.visual_iters = visual_iters
  146. self.weight_iters = weight_iters
  147. self.iter_count = 0
  148. self.weight_vis = ModelHistogramVisualizer()
  149. self.img_gen_vis = ImageGenVisualizer()
  150. self.data = None
  151. def on_batch_end(self, iteration, last_loss, metrics, **kwargs):
  152. if iteration==0:
  153. return
  154. #one_batch is extremely slow. this is an optimization
  155. update_batches = self.data is not self.learn.data
  156. if update_batches:
  157. self.data = self.learn.data
  158. self.trn_batch = self.learn.data.one_batch(DatasetType.Train, detach=False, denorm=False)
  159. self.val_batch = self.learn.data.one_batch(DatasetType.Valid, detach=False, denorm=False)
  160. if iteration % self.stats_iters == 0:
  161. trn_loss = to_np(last_loss)
  162. self.tbwriter.add_scalar('/loss/trn_loss', trn_loss, iteration)
  163. if iteration % self.visual_iters == 0:
  164. self.img_gen_vis.output_image_gen_visuals(learn=self.learn, trn_batch=self.trn_batch, val_batch=self.val_batch,
  165. iter_count=iteration, tbwriter=self.tbwriter)
  166. if iteration % self.weight_iters == 0:
  167. self.weight_vis.write_tensorboard_histograms(model=self.learn.model, iter_count=iteration, tbwriter=self.tbwriter)
  168. def on_epoch_end(self, iteration, metrics, last_metrics, **kwargs):
  169. #TODO: Not a fan of this indexing but...what to do?
  170. val_loss = last_metrics[0]
  171. self.tbwriter.add_scalar('/loss/val_loss', val_loss, iteration)